@article{AckfeldRohloffRzepka2021, author = {Ackfeld, Viola and Rohloff, Tobias and Rzepka, Sylvi}, title = {Increasing personal data contributions for the greater public good}, series = {Behavioural public policy}, journal = {Behavioural public policy}, publisher = {Cambridge University Press}, address = {Cambridge}, issn = {2398-063X}, doi = {10.1017/bpp.2021.39}, pages = {1 -- 27}, year = {2021}, abstract = {Personal data increasingly serve as inputs to public goods. Like other types of contributions to public goods, personal data are likely to be underprovided. We investigate whether classical remedies to underprovision are also applicable to personal data and whether the privacy-sensitive nature of personal data must be additionally accounted for. In a randomized field experiment on a public online education platform, we prompt users to complete their profiles with personal information. Compared to a control message, we find that making public benefits salient increases the number of personal data contributions significantly. This effect is even stronger when additionally emphasizing privacy protection, especially for sensitive information. Our results further suggest that emphasis on both public benefits and privacy protection attracts personal data from a more diverse set of contributors.}, language = {en} } @article{AdnanSrsicVenticichetal.2020, author = {Adnan, Hassan Sami and Srsic, Amanda and Venticich, Pete Milos and Townend, David M.R.}, title = {Using AI for mental health analysis and prediction in school surveys}, series = {European journal of public health}, volume = {30}, journal = {European journal of public health}, publisher = {Oxford Univ. Press}, address = {Oxford [u.a.]}, issn = {1101-1262}, doi = {10.1093/eurpub/ckaa165.336}, pages = {V125 -- V125}, year = {2020}, abstract = {Background: Childhood and adolescence are critical stages of life for mental health and well-being. Schools are a key setting for mental health promotion and illness prevention. One in five children and adolescents have a mental disorder, about half of mental disorders beginning before the age of 14. Beneficial and explainable artificial intelligence can replace current paper- based and online approaches to school mental health surveys. This can enhance data acquisition, interoperability, data driven analysis, trust and compliance. This paper presents a model for using chatbots for non-obtrusive data collection and supervised machine learning models for data analysis; and discusses ethical considerations pertaining to the use of these models. Methods: For data acquisition, the proposed model uses chatbots which interact with students. The conversation log acts as the source of raw data for the machine learning. Pre-processing of the data is automated by filtering for keywords and phrases. Existing survey results, obtained through current paper-based data collection methods, are evaluated by domain experts (health professionals). These can be used to create a test dataset to validate the machine learning models. Supervised learning can then be deployed to classify specific behaviour and mental health patterns. Results: We present a model that can be used to improve upon current paper-based data collection and manual data analysis methods. An open-source GitHub repository contains necessary tools and components of this model. Privacy is respected through rigorous observance of confidentiality and data protection requirements. Critical reflection on these ethics and law aspects is included in the project. Conclusions: This model strengthens mental health surveillance in schools. The same tools and components could be applied to other public health data. Future extensions of this model could also incorporate unsupervised learning to find clusters and patterns of unknown effects.}, language = {en} } @inproceedings{DiazFerreyraShahiTonyetal.2023, author = {Diaz Ferreyra, Nicol{\´a}s Emilio and Shahi, Gautam Kishore and Tony, Catherine and Stieglitz, Stefan and Scandariato, Riccardo}, title = {Regret, delete, (do not) repeat}, series = {Extended abstracts of the 2023 CHI conference on human factors in computing systems}, booktitle = {Extended abstracts of the 2023 CHI conference on human factors in computing systems}, editor = {Schmidt, Albrecht and V{\"a}{\"a}n{\"a}nen, Kaisa and Goyal, Tesh and Kristensson, Per Ola and Peters, Anicia}, publisher = {ACM}, address = {New York, NY}, isbn = {978-1-45039-422-2}, doi = {10.1145/3544549.3585583}, pages = {1 -- 7}, year = {2023}, abstract = {During the outbreak of the COVID-19 pandemic, many people shared their symptoms across Online Social Networks (OSNs) like Twitter, hoping for others' advice or moral support. Prior studies have shown that those who disclose health-related information across OSNs often tend to regret it and delete their publications afterwards. Hence, deleted posts containing sensitive data can be seen as manifestations of online regrets. In this work, we present an analysis of deleted content on Twitter during the outbreak of the COVID-19 pandemic. For this, we collected more than 3.67 million tweets describing COVID-19 symptoms (e.g., fever, cough, and fatigue) posted between January and April 2020. We observed that around 24\% of the tweets containing personal pronouns were deleted either by their authors or by the platform after one year. As a practical application of the resulting dataset, we explored its suitability for the automatic classification of regrettable content on Twitter.}, language = {en} } @inproceedings{ErmakovaFabianBenderetal.2018, author = {Ermakova, Tatiana and Fabian, Benjamin and Bender, Benedict and Klimek, Kerstin}, title = {Web Tracking}, series = {Proceedings of the Annual Hawaii International Conference on System Sciences (HICSS 51)}, booktitle = {Proceedings of the Annual Hawaii International Conference on System Sciences (HICSS 51)}, publisher = {HICSS Conference Office University of Hawaii at Manoa}, address = {Maile Way}, issn = {2572-6862}, doi = {10.24251/HICSS.2018.596}, pages = {4732 -- 4741}, year = {2018}, abstract = {Web tracking seems to become ubiquitous in online business and leads to increased privacy concerns of users. This paper provides an overview over the current state of the art of web-tracking research, aiming to reveal the relevance and methodologies of this research area and creates a foundation for future work. In particular, this study addresses the following research questions: What methods are followed? What results have been achieved so far? What are potential future research areas? For these goals, a structured literature review based upon an established methodological framework is conducted. The identified articles are investigated with respect to the applied research methodologies and the aspects of web tracking they emphasize.}, language = {en} } @article{ErmakovaFabianZarnekow2016, author = {Ermakova, Tatiana and Fabian, Benjamin and Zarnekow, Ruediger}, title = {Improving Individual Acceptance of Health Clouds through Confidentiality Assurance}, series = {Applied clinical informatics}, volume = {7}, journal = {Applied clinical informatics}, publisher = {Schattauer}, address = {Stuttgart}, issn = {1869-0327}, doi = {10.4338/ACI-2016-07-RA-0107}, pages = {983 -- 993}, year = {2016}, abstract = {Background: Cloud computing promises to essentially improve healthcare delivery performance. However, shifting sensitive medical records to third-party cloud providers could create an adoption hurdle because of security and privacy concerns. Methods: We empirically investigate our research question by a survey with over 260 full responses. For the setting with a high confidentiality assurance, we base on a recent multi-cloud architecture which provides very high confidentiality assurance through a secret-sharing mechanism: Health information is cryptographically encoded and distributed in a way that no single and no small group of cloud providers is able to decode it.}, language = {en} } @book{vanderWaltOdunAyoBastianetal.2018, author = {van der Walt, Estee and Odun-Ayo, Isaac and Bastian, Matthias and Eldin Elsaid, Mohamed Esam}, title = {Proceedings of the Fifth HPI Cloud Symposium "Operating the Cloud" 2017}, number = {122}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-432-6}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-411330}, publisher = {Universit{\"a}t Potsdam}, pages = {70}, year = {2018}, abstract = {Every year, the Hasso Plattner Institute (HPI) invites guests from industry and academia to a collaborative scientific workshop on the topic Operating the Cloud. Our goal is to provide a forum for the exchange of knowledge and experience between industry and academia. Co-located with the event is the HPI's Future SOC Lab day, which offers an additional attractive and conducive environment for scientific and industry related discussions. Operating the Cloud aims to be a platform for productive interactions of innovative ideas, visions, and upcoming technologies in the field of cloud operation and administration. In these proceedings, the results of the fifth HPI cloud symposium Operating the Cloud 2017 are published. We thank the authors for exciting presentations and insights into their current work and research. Moreover, we look forward to more interesting submissions for the upcoming symposium in 2018.}, language = {en} } @phdthesis{Meier2017, author = {Meier, Sebastian}, title = {Personal Big Data}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-406696}, school = {Universit{\"a}t Potsdam}, pages = {xxiv, 133}, year = {2017}, abstract = {Many users of cloud-based services are concerned about questions of data privacy. At the same time, they want to benefit from smart data-driven services, which require insight into a person's individual behaviour. The modus operandi of user modelling is that data is sent to a remote server where the model is constructed and merged with other users' data. This thesis proposes selective cloud computing, an alternative approach, in which the user model is constructed on the client-side and only an abstracted generalised version of the model is shared with the remote services. In order to demonstrate the applicability of this approach, the thesis builds an exemplary client-side user modelling technique. As this thesis is carried out in the area of Geoinformatics and spatio-temporal data is particularly sensitive, the application domain for this experiment is the analysis and prediction of a user's spatio-temporal behaviour. The user modelling technique is grounded in an innovative conceptual model, which builds upon spatial network theory combined with time-geography. The spatio-temporal constraints of time-geography are applied to the network structure in order to create individual spatio-temporal action spaces. This concept is translated into a novel algorithmic user modelling approach which is solely driven by the user's own spatio-temporal trajectory data that is generated by the user's smartphone. While modern smartphones offer a rich variety of sensory data, this thesis only makes use of spatio-temporal trajectory data, enriched by activity classification, as the input and foundation for the algorithmic model. The algorithmic model consists of three basal components: locations (vertices), trips (edges), and clusters (neighbourhoods). After preprocessing the incoming trajectory data in order to identify locations, user feedback is used to train an artificial neural network to learn temporal patterns for certain location types (e.g. work, home, bus stop, etc.). This Artificial Neural Network (ANN) is used to automatically detect future location types by their spatio-temporal patterns. The same is done in order to predict the duration of stay at a certain location. Experiments revealed that neural nets were the most successful statistical and machine learning tool to detect those patterns. The location type identification algorithm reached an accuracy of 87.69\%, the duration prediction on binned data was less successful and deviated by an average of 0.69 bins. A challenge for the location type classification, as well as for the subsequent components, was the imbalance of trips and connections as well as the low accuracy of the trajectory data. The imbalance is grounded in the fact that most users exhibit strong habitual patterns (e.g. home > work), while other patterns are rather rare by comparison. The accuracy problem derives from the energy-saving location sampling mode, which creates less accurate results. Those locations are then used to build a network that represents the user's spatio-temporal behaviour. An initial untrained ANN to predict movement on the network only reached 46\% average accuracy. Only lowering the number of included edges, focusing on more common trips, increased the performance. In order to further improve the algorithm, the spatial trajectories were introduced into the predictions. To overcome the accuracy problem, trips between locations were clustered into so-called spatial corridors, which were intersected with the user's current trajectory. The resulting intersected trips were ranked through a k-nearest-neighbour algorithm. This increased the performance to 56\%. In a final step, a combination of a network and spatial clustering algorithm was built in order to create clusters, therein reducing the variety of possible trips. By only predicting the destination cluster instead of the exact location, it is possible to increase the performance to 75\% including all classes. A final set of components shows in two exemplary ways how to deduce additional inferences from the underlying spatio-temporal data. The first example presents a novel concept for predicting the 'potential memorisation index' for a certain location. The index is based on a cognitive model which derives the index from the user's activity data in that area. The second example embeds each location in its urban fabric and thereby enriches its cluster's metadata by further describing the temporal-semantic activity in an area (e.g. going to restaurants at noon). The success of the client-side classification and prediction approach, despite the challenges of inaccurate and imbalanced data, supports the claimed benefits of the client-side modelling concept. Since modern data-driven services at some point do need to receive user data, the thesis' computational model concludes with a concept for applying generalisation to semantic, temporal, and spatial data before sharing it with the remote service in order to comply with the overall goal to improve data privacy. In this context, the potentials of ensemble training (in regards to ANNs) are discussed in order to highlight the potential of only sharing the trained ANN instead of the raw input data. While the results of our evaluation support the assets of the proposed framework, there are two important downsides of our approach compared to server-side modelling. First, both of these server-side advantages are rooted in the server's access to multiple users' data. This allows a remote service to predict spatio-in the user-specific data, which represents the second downside. While minor classes will likely be minor classes in a bigger dataset as well, for each class, there will still be more variety than in the user-specific dataset. The author emphasises that the approach presented in this work holds the potential to change the privacy paradigm in modern data-driven services. Finding combinations of client- and server-side modelling could prove a promising new path for data-driven innovation. Beyond the technological perspective, throughout the thesis the author also offers a critical view on the data- and technology-driven development of this work. By introducing the client-side modelling with user-specific artificial neural networks, users generate their own algorithm. Those user-specific algorithms are influenced less by generalised biases or developers' prejudices. Therefore, the user develops a more diverse and individual perspective through his or her user model. This concept picks up the idea of critical cartography, which questions the status quo of how space is perceived and represented.}, language = {en} } @article{MoellersHaelterlein2013, author = {M{\"o}llers, Norma Tamaria and H{\"a}lterlein, Jens}, title = {Privacy issues in public discourse the case of "smart" CCTV in Germany}, series = {Innovation : the European journal of social sciences}, volume = {26}, journal = {Innovation : the European journal of social sciences}, number = {1-2}, publisher = {Routledge, Taylor \& Francis Group}, address = {Abingdon}, issn = {1351-1610}, doi = {10.1080/13511610.2013.723396}, pages = {57 -- 70}, year = {2013}, abstract = {In dealing with surveillance, scholars have widely agreed to refute privacy as an analytical concept and defining theme. Nonetheless, in public debates, surveillance technologies are still confronted with issues of privacy, and privacy therefore endures as an empirical subject of research on surveillance. Drawing from our analysis of public discourse of so-called smart closed-circuit television (CCTV) in Germany, we propose to use a sociology of knowledge perspective to analyze privacy in order to understand how it is socially constructed and negotiated. Our data comprise 117 documents, covering all publicly available documents between 2006 and 2010 that we were able to obtain. We found privacy to be the only form of critique in the struggle for the legitimate definition of smart CCTV. In this paper, we discuss the implications our preliminary findings have for the relationship between privacy issues and surveillance technology and conclude with suggestions of how this relationship might be further investigated as paradoxical, yet constitutive.}, language = {en} } @book{OPUS4-6813, title = {Cloud security mechanisms}, number = {87}, editor = {Neuhaus, Christian and Polze, Andreas}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-281-0}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-68168}, publisher = {Universit{\"a}t Potsdam}, pages = {78}, year = {2014}, abstract = {Cloud computing has brought great benefits in cost and flexibility for provisioning services. The greatest challenge of cloud computing remains however the question of security. The current standard tools in access control mechanisms and cryptography can only partly solve the security challenges of cloud infrastructures. In the recent years of research in security and cryptography, novel mechanisms, protocols and algorithms have emerged that offer new ways to create secure services atop cloud infrastructures. This report provides introductions to a selection of security mechanisms that were part of the "Cloud Security Mechanisms" seminar in summer term 2013 at HPI.}, language = {en} } @book{NeuhausPolzeChowdhuryy2011, author = {Neuhaus, Christian and Polze, Andreas and Chowdhuryy, Mohammad M. R.}, title = {Survey on healthcare IT systems : standards, regulations and security}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-128-8}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-51463}, publisher = {Universit{\"a}t Potsdam}, pages = {53}, year = {2011}, abstract = {IT systems for healthcare are a complex and exciting field. One the one hand, there is a vast number of improvements and work alleviations that computers can bring to everyday healthcare. Some ways of treatment, diagnoses and organisational tasks were even made possible by computer usage in the first place. On the other hand, there are many factors that encumber computer usage and make development of IT systems for healthcare a challenging, sometimes even frustrating task. These factors are not solely technology-related, but just as well social or economical conditions. This report describes some of the idiosyncrasies of IT systems in the healthcare domain, with a special focus on legal regulations, standards and security.}, language = {en} }