@misc{SeewannVerwiebeBuderetal.2022, author = {Seewann, Lena and Verwiebe, Roland and Buder, Claudia and Fritsch, Nina-Sophie}, title = {"Broadcast your gender."}, series = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Wirtschafts- und Sozialwissenschaftliche Reihe}, journal = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Wirtschafts- und Sozialwissenschaftliche Reihe}, number = {152}, issn = {1867-5808}, doi = {10.25932/publishup-56628}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-566287}, pages = {16}, year = {2022}, abstract = {Social media platforms provide a large array of behavioral data relevant to social scientific research. However, key information such as sociodemographic characteristics of agents are often missing. This paper aims to compare four methods of classifying social attributes from text. Specifically, we are interested in estimating the gender of German social media creators. By using the example of a random sample of 200 YouTube channels, we compare several classification methods, namely (1) a survey among university staff, (2) a name dictionary method with the World Gender Name Dictionary as a reference list, (3) an algorithmic approach using the website gender-api.com, and (4) a Multinomial Na{\"i}ve Bayes (MNB) machine learning technique. These different methods identify gender attributes based on YouTube channel names and descriptions in German but are adaptable to other languages. Our contribution will evaluate the share of identifiable channels, accuracy and meaningfulness of classification, as well as limits and benefits of each approach. We aim to address methodological challenges connected to classifying gender attributes for YouTube channels as well as related to reinforcing stereotypes and ethical implications.}, language = {en} } @article{SeewannVerwiebeBuderetal.2022, author = {Seewann, Lena and Verwiebe, Roland and Buder, Claudia and Fritsch, Nina-Sophie}, title = {"Broadcast your gender."}, series = {Frontiers in Big Data}, journal = {Frontiers in Big Data}, number = {5}, publisher = {Frontiers}, address = {Lausanne, Schweiz}, issn = {2624-909X}, doi = {10.3389/fdata.2022.908636}, pages = {16}, year = {2022}, abstract = {Social media platforms provide a large array of behavioral data relevant to social scientific research. However, key information such as sociodemographic characteristics of agents are often missing. This paper aims to compare four methods of classifying social attributes from text. Specifically, we are interested in estimating the gender of German social media creators. By using the example of a random sample of 200 YouTube channels, we compare several classification methods, namely (1) a survey among university staff, (2) a name dictionary method with the World Gender Name Dictionary as a reference list, (3) an algorithmic approach using the website gender-api.com, and (4) a Multinomial Na{\"i}ve Bayes (MNB) machine learning technique. These different methods identify gender attributes based on YouTube channel names and descriptions in German but are adaptable to other languages. Our contribution will evaluate the share of identifiable channels, accuracy and meaningfulness of classification, as well as limits and benefits of each approach. We aim to address methodological challenges connected to classifying gender attributes for YouTube channels as well as related to reinforcing stereotypes and ethical implications.}, language = {en} } @phdthesis{Haarmann2022, author = {Haarmann, Stephan}, title = {WICKR: A Joint Semantics for Flexible Processes and Data}, doi = {10.25932/publishup-54613}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-546137}, school = {Universit{\"a}t Potsdam}, pages = {xvii, 191}, year = {2022}, abstract = {Knowledge-intensive business processes are flexible and data-driven. Therefore, traditional process modeling languages do not meet their requirements: These languages focus on highly structured processes in which data plays a minor role. As a result, process-oriented information systems fail to assist knowledge workers on executing their processes. We propose a novel case management approach that combines flexible activity-centric processes with data models, and we provide a joint semantics using colored Petri nets. The approach is suited to model, verify, and enact knowledge-intensive processes and can aid the development of information systems that support knowledge work. Knowledge-intensive processes are human-centered, multi-variant, and data-driven. Typical domains include healthcare, insurances, and law. The processes cannot be fully modeled, since the underlying knowledge is too vast and changes too quickly. Thus, models for knowledge-intensive processes are necessarily underspecified. In fact, a case emerges gradually as knowledge workers make informed decisions. Knowledge work imposes special requirements on modeling and managing respective processes. They include flexibility during design and execution, ad-hoc adaption to unforeseen situations, and the integration of behavior and data. However, the predominantly used process modeling languages (e.g., BPMN) are unsuited for this task. Therefore, novel modeling languages have been proposed. Many of them focus on activities' data requirements and declarative constraints rather than imperative control flow. Fragment-Based Case Management, for example, combines activity-centric imperative process fragments with declarative data requirements. At runtime, fragments can be combined dynamically, and new ones can be added. Yet, no integrated semantics for flexible activity-centric process models and data models exists. In this thesis, Wickr, a novel case modeling approach extending fragment-based Case Management, is presented. It supports batch processing of data, sharing data among cases, and a full-fledged data model with associations and multiplicity constraints. We develop a translational semantics for Wickr targeting (colored) Petri nets. The semantics assert that a case adheres to the constraints in both the process fragments and the data models. Among other things, multiplicity constraints must not be violated. Furthermore, the semantics are extended to multiple cases that operate on shared data. Wickr shows that the data structure may reflect process behavior and vice versa. Based on its semantics, prototypes for executing and verifying case models showcase the feasibility of Wickr. Its applicability to knowledge-intensive and to data-centric processes is evaluated using well-known requirements from related work.}, language = {en} } @phdthesis{Elsaid2022, author = {Elsaid, Mohamed Esameldin Mohamed}, title = {Virtual machines live migration cost modeling and prediction}, doi = {10.25932/publishup-54001}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-540013}, school = {Universit{\"a}t Potsdam}, pages = {xiv, 107}, year = {2022}, abstract = {Dynamic resource management is an essential requirement for private and public cloud computing environments. With dynamic resource management, the physical resources assignment to the cloud virtual resources depends on the actual need of the applications or the running services, which enhances the cloud physical resources utilization and reduces the offered services cost. In addition, the virtual resources can be moved across different physical resources in the cloud environment without an obvious impact on the running applications or services production. This means that the availability of the running services and applications in the cloud is independent on the hardware resources including the servers, switches and storage failures. This increases the reliability of using cloud services compared to the classical data-centers environments. In this thesis we briefly discuss the dynamic resource management topic and then deeply focus on live migration as the definition of the compute resource dynamic management. Live migration is a commonly used and an essential feature in cloud and virtual data-centers environments. Cloud computing load balance, power saving and fault tolerance features are all dependent on live migration to optimize the virtual and physical resources usage. As we will discuss in this thesis, live migration shows many benefits to cloud and virtual data-centers environments, however the cost of live migration can not be ignored. Live migration cost includes the migration time, downtime, network overhead, power consumption increases and CPU overhead. IT admins run virtual machines live migrations without an idea about the migration cost. So, resources bottlenecks, higher migration cost and migration failures might happen. The first problem that we discuss in this thesis is how to model the cost of the virtual machines live migration. Secondly, we investigate how to make use of machine learning techniques to help the cloud admins getting an estimation of this cost before initiating the migration for one of multiple virtual machines. Also, we discuss the optimal timing for a specific virtual machine before live migration to another server. Finally, we propose practical solutions that can be used by the cloud admins to be integrated with the cloud administration portals to answer the raised research questions above. Our research methodology to achieve the project objectives is to propose empirical models based on using VMware test-beds with different benchmarks tools. Then we make use of the machine learning techniques to propose a prediction approach for virtual machines live migration cost. Timing optimization for live migration is also proposed in this thesis based on using the cost prediction and data-centers network utilization prediction. Live migration with persistent memory clusters is also discussed at the end of the thesis. The cost prediction and timing optimization techniques proposed in this thesis could be practically integrated with VMware vSphere cluster portal such that the IT admins can now use the cost prediction feature and timing optimization option before proceeding with a virtual machine live migration. Testing results show that our proposed approach for VMs live migration cost prediction shows acceptable results with less than 20\% prediction error and can be easily implemented and integrated with VMware vSphere as an example of a commonly used resource management portal for virtual data-centers and private cloud environments. The results show that using our proposed VMs migration timing optimization technique also could save up to 51\% of migration time of the VMs migration time for memory intensive workloads and up to 27\% of the migration time for network intensive workloads. This timing optimization technique can be useful for network admins to save migration time with utilizing higher network rate and higher probability of success. At the end of this thesis, we discuss the persistent memory technology as a new trend in servers memory technology. Persistent memory modes of operation and configurations are discussed in detail to explain how live migration works between servers with different memory configuration set up. Then, we build a VMware cluster with persistent memory inside server and also with DRAM only servers to show the live migration cost difference between the VMs with DRAM only versus the VMs with persistent memory inside.}, language = {en} } @article{SpiekermannKrasnovaHinzetal.2022, author = {Spiekermann, Sarah and Krasnova, Hanna and Hinz, Oliver and Baumann, Annika and Benlian, Alexander and Gimpel, Henner and Heimbach, Irina and Koester, Antonia and Maedche, Alexander and Niehaves, Bjoern and Risius, Marten and Trenz, Manuel}, title = {Values and ethics in information systems}, series = {Business \& information systems engineering}, volume = {64}, journal = {Business \& information systems engineering}, number = {2}, publisher = {Springer Gabler}, address = {Wiesbaden}, issn = {2363-7005}, doi = {10.1007/s12599-021-00734-8}, pages = {247 -- 264}, year = {2022}, language = {en} } @phdthesis{Gruener2022, author = {Gr{\"u}ner, Andreas}, title = {Towards practical and trust-enhancing attribute aggregation for self-sovereign identity}, doi = {10.25932/publishup-56745}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-567450}, school = {Universit{\"a}t Potsdam}, pages = {xvii, 175}, year = {2022}, abstract = {Identity management is at the forefront of applications' security posture. It separates the unauthorised user from the legitimate individual. Identity management models have evolved from the isolated to the centralised paradigm and identity federations. Within this advancement, the identity provider emerged as a trusted third party that holds a powerful position. Allen postulated the novel self-sovereign identity paradigm to establish a new balance. Thus, extensive research is required to comprehend its virtues and limitations. Analysing the new paradigm, initially, we investigate the blockchain-based self-sovereign identity concept structurally. Moreover, we examine trust requirements in this context by reference to patterns. These shapes comprise major entities linked by a decentralised identity provider. By comparison to the traditional models, we conclude that trust in credential management and authentication is removed. Trust-enhancing attribute aggregation based on multiple attribute providers provokes a further trust shift. Subsequently, we formalise attribute assurance trust modelling by a metaframework. It encompasses the attestation and trust network as well as the trust decision process, including the trust function, as central components. A secure attribute assurance trust model depends on the security of the trust function. The trust function should consider high trust values and several attribute authorities. Furthermore, we evaluate classification, conceptual study, practical analysis and simulation as assessment strategies of trust models. For realising trust-enhancing attribute aggregation, we propose a probabilistic approach. The method exerts the principle characteristics of correctness and validity. These values are combined for one provider and subsequently for multiple issuers. We embed this trust function in a model within the self-sovereign identity ecosystem. To practically apply the trust function and solve several challenges for the service provider that arise from adopting self-sovereign identity solutions, we conceptualise and implement an identity broker. The mediator applies a component-based architecture to abstract from a single solution. Standard identity and access management protocols build the interface for applications. We can conclude that the broker's usage at the side of the service provider does not undermine self-sovereign principles, but fosters the advancement of the ecosystem. The identity broker is applied to sample web applications with distinct attribute requirements to showcase usefulness for authentication and attribute-based access control within a case study.}, language = {en} } @book{KlinkeVerhoevenRothetal.2022, author = {Klinke, Paula and Verhoeven, Silvan and Roth, Felix and Hagemann, Linus and Alnawa, Tarik and Lincke, Jens and Rein, Patrick and Hirschfeld, Robert}, title = {Tool support for collaborative creation of interactive storytelling media}, number = {141}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-521-7}, issn = {1613-5652}, doi = {10.25932/publishup-51857}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-518570}, publisher = {Universit{\"a}t Potsdam}, pages = {x, 167}, year = {2022}, abstract = {Scrollytellings are an innovative form of web content. Combining the benefits of books, images, movies, and video games, they are a tool to tell compelling stories and provide excellent learning opportunities. Due to their multi-modality, creating high-quality scrollytellings is not an easy task. Different professions, such as content designers, graphics designers, and developers, need to collaborate to get the best out of the possibilities the scrollytelling format provides. Collaboration unlocks great potential. However, content designers cannot create scrollytellings directly and always need to consult with developers to implement their vision. This can result in misunderstandings. Often, the resulting scrollytelling will not match the designer's vision sufficiently, causing unnecessary iterations. Our project partner Typeshift specializes in the creation of individualized scrollytellings for their clients. Examined existing solutions for authoring interactive content are not optimally suited for creating highly customized scrollytellings while still being able to manipulate all their elements programmatically. Based on their experience and expertise, we developed an editor to author scrollytellings in the lively.next live-programming environment. In this environment, a graphical user interface for content design is combined with powerful possibilities for programming behavior with the morphic system. The editor allows content designers to take on large parts of the creation process of scrollytellings on their own, such as creating the visible elements, animating content, and fine-tuning the scrollytelling. Hence, developers can focus on interactive elements such as simulations and games. Together with Typeshift, we evaluated the tool by recreating an existing scrollytelling and identified possible future enhancements. Our editor streamlines the creation process of scrollytellings. Content designers and developers can now both work on the same scrollytelling. Due to the editor inside of the lively.next environment, they can both work with a set of tools familiar to them and their traits. Thus, we mitigate unnecessary iterations and misunderstandings by enabling content designers to realize large parts of their vision of a scrollytelling on their own. Developers can add advanced and individual behavior. Thus, developers and content designers benefit from a clearer distribution of tasks while keeping the benefits of collaboration.}, language = {en} } @phdthesis{Dehnert2022, author = {Dehnert, Maik}, title = {Studies on the Digital Transformation of Incumbent Organizations}, doi = {10.25932/publishup-54832}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-548324}, school = {Universit{\"a}t Potsdam}, pages = {339}, year = {2022}, abstract = {Traditional organizations are strongly encouraged by emerging digital customer behavior and digital competition to transform their businesses for the digital age. Incumbents are particularly exposed to the field of tension between maintaining and renewing their business model. Banking is one of the industries most affected by digitalization, with a large stream of digital innovations around Fintech. Most research contributions focus on digital innovations, such as Fintech, but there are only a few studies on the related challenges and perspectives of incumbent organizations, such as traditional banks. Against this background, this dissertation examines the specific causes, effects and solutions for traditional banks in digital transformation - an underrepresented research area so far. The first part of the thesis examines how digitalization has changed the latent customer expectations in banking and studies the underlying technological drivers of evolving business-to-consumer (B2C) business models. Online consumer reviews are systematized to identify latent concepts of customer behavior and future decision paths as strategic digitalization effects. Furthermore, the service attribute preferences, the impact of influencing factors and the underlying customer segments are uncovered for checking accounts in a discrete choice experiment. The dissertation contributes here to customer behavior research in digital transformation, moving beyond the technology acceptance model. In addition, the dissertation systematizes value proposition types in the evolving discourse around smart products and services as key drivers of business models and market power in the platform economy. The second part of the thesis focuses on the effects of digital transformation on the strategy development of financial service providers, which are classified along with their firm performance levels. Standard types are derived based on fuzzy-set qualitative comparative analysis (fsQCA), with facade digitalization as one typical standard type for low performing incumbent banks that lack a holistic strategic response to digital transformation. Based on this, the contradictory impact of digitalization measures on key business figures is examined for German savings banks, confirming that the shift towards digital customer interaction was not accompanied by new revenue models diminishing bank profitability. The dissertation further contributes to the discourse on digitalized work designs and the consequences for job perceptions in banking customer advisory. The threefold impact of the IT support perceived in customer interaction on the job satisfaction of customer advisors is disentangled. In the third part of the dissertation, solutions are developed design-oriented for core action areas of digitalized business models, i.e., data and platforms. A consolidated taxonomy for data-driven business models and a future reference model for digital banking have been developed. The impact of the platform economy is demonstrated here using the example of the market entry by Bigtech. The role-based e3-value modeling is extended by meta-roles and role segments and linked to value co-creation mapping in VDML. In this way, the dissertation extends enterprise modeling research on platform ecosystems and value co-creation using the example of banking.}, language = {en} } @article{ChenLangeAndjelkovicetal.2022, author = {Chen, Junchao and Lange, Thomas and Andjelkovic, Marko and Simevski, Aleksandar and Lu, Li and Krstić, Miloš}, title = {Solar particle event and single event upset prediction from SRAM-based monitor and supervised machine learning}, series = {IEEE transactions on emerging topics in computing / IEEE Computer Society, Institute of Electrical and Electronics Engineers}, volume = {10}, journal = {IEEE transactions on emerging topics in computing / IEEE Computer Society, Institute of Electrical and Electronics Engineers}, number = {2}, publisher = {Institute of Electrical and Electronics Engineers}, address = {[New York, NY]}, issn = {2168-6750}, doi = {10.1109/TETC.2022.3147376}, pages = {564 -- 580}, year = {2022}, abstract = {The intensity of cosmic radiation may differ over five orders of magnitude within a few hours or days during the Solar Particle Events (SPEs), thus increasing for several orders of magnitude the probability of Single Event Upsets (SEUs) in space-borne electronic systems. Therefore, it is vital to enable the early detection of the SEU rate changes in order to ensure timely activation of dynamic radiation hardening measures. In this paper, an embedded approach for the prediction of SPEs and SRAM SEU rate is presented. The proposed solution combines the real-time SRAM-based SEU monitor, the offline-trained machine learning model and online learning algorithm for the prediction. With respect to the state-of-the-art, our solution brings the following benefits: (1) Use of existing on-chip data storage SRAM as a particle detector, thus minimizing the hardware and power overhead, (2) Prediction of SRAM SEU rate one hour in advance, with the fine-grained hourly tracking of SEU variations during SPEs as well as under normal conditions, (3) Online optimization of the prediction model for enhancing the prediction accuracy during run-time, (4) Negligible cost of hardware accelerator design for the implementation of selected machine learning model and online learning algorithm. The proposed design is intended for a highly dependable and self-adaptive multiprocessing system employed in space applications, allowing to trigger the radiation mitigation mechanisms before the onset of high radiation levels.}, language = {en} } @phdthesis{Melnichenko2022, author = {Melnichenko, Anna}, title = {Selfish Creation of Realistic Networks}, doi = {10.25932/publishup-54814}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-548141}, school = {Universit{\"a}t Potsdam}, pages = {xi, 175}, year = {2022}, abstract = {Complex networks like the Internet or social networks are fundamental parts of our everyday lives. It is essential to understand their structural properties and how these networks are formed. A game-theoretic approach to network design problems has become of high interest in the last decades. The reason is that many real-world networks are the outcomes of decentralized strategic behavior of independent agents without central coordination. Fabrikant, Luthra, Maneva, Papadimitriou, and Schenker proposed a game-theoretic model aiming to explain the formation of the Internet-like networks. In this model, called the Network Creation Game, agents are associated with nodes of a network. Each agent seeks to maximize her centrality by establishing costly connections to other agents. The model is relatively simple but shows a high potential in modeling complex real-world networks. In this thesis, we contribute to the line of research on variants of the Network Creation Games. Inspired by real-world networks, we propose and analyze several novel network creation models. We aim to understand the impact of certain realistic modeling assumptions on the structure of the created networks and the involved agents' behavior. The first natural additional objective that we consider is the network's robustness. We consider a game where the agents seek to maximize their centrality and, at the same time, the stability of the created network against random edge failure. Our second point of interest is a model that incorporates an underlying geometry. We consider a network creation model where the agents correspond to points in some underlying space and where edge lengths are equal to the distances between the endpoints in that space. The geometric setting captures many physical real-world networks like transport networks and fiber-optic communication networks. We focus on the formation of social networks and consider two models that incorporate particular realistic behavior observed in real-world networks. In the first model, we embed the anti-preferential attachment link formation. Namely, we assume that the cost of the connection is proportional to the popularity of the targeted agent. Our second model is based on the observation that the probability of two persons to connect is inversely proportional to the length of their shortest chain of mutual acquaintances. For each of the four models above, we provide a complete game-theoretical analysis. In particular, we focus on distinctive structural properties of the equilibria, the hardness of computing a best response, the quality of equilibria in comparison to the centrally designed socially optimal networks. We also analyze the game dynamics, i.e., the process of sequential strategic improvements by the agents, and analyze the convergence to an equilibrium state and its properties.}, language = {en} } @article{TalebRohrerBergneretal.2022, author = {Taleb, Aiham and Rohrer, Csaba and Bergner, Benjamin and De Leon, Guilherme and Rodrigues, Jonas Almeida and Schwendicke, Falk and Lippert, Christoph and Krois, Joachim}, title = {Self-supervised learning methods for label-efficient dental caries classification}, series = {Diagnostics : open access journal}, volume = {12}, journal = {Diagnostics : open access journal}, number = {5}, publisher = {MDPI}, address = {Basel}, issn = {2075-4418}, doi = {10.3390/diagnostics12051237}, pages = {15}, year = {2022}, abstract = {High annotation costs are a substantial bottleneck in applying deep learning architectures to clinically relevant use cases, substantiating the need for algorithms to learn from unlabeled data. In this work, we propose employing self-supervised methods. To that end, we trained with three self-supervised algorithms on a large corpus of unlabeled dental images, which contained 38K bitewing radiographs (BWRs). We then applied the learned neural network representations on tooth-level dental caries classification, for which we utilized labels extracted from electronic health records (EHRs). Finally, a holdout test-set was established, which consisted of 343 BWRs and was annotated by three dental professionals and approved by a senior dentist. This test-set was used to evaluate the fine-tuned caries classification models. Our experimental results demonstrate the obtained gains by pretraining models using self-supervised algorithms. These include improved caries classification performance (6 p.p. increase in sensitivity) and, most importantly, improved label-efficiency. In other words, the resulting models can be fine-tuned using few labels (annotations). Our results show that using as few as 18 annotations can produce >= 45\% sensitivity, which is comparable to human-level diagnostic performance. This study shows that self-supervision can provide gains in medical image analysis, particularly when obtaining labels is costly and expensive.}, language = {en} } @article{Schladebach2022, author = {Schladebach, Marcus}, title = {Satelliten-Megakonstellationen im Weltraumrecht}, series = {Kommunikation \& Recht : K \& R / Beihefter}, journal = {Kommunikation \& Recht : K \& R / Beihefter}, number = {2}, publisher = {dfv-Mediengruppe}, address = {Frankfurt am Main}, issn = {1434-6354}, pages = {26 -- 29}, year = {2022}, language = {de} } @article{vonSteinauSteinrueckMiller2022, author = {von Steinau-Steinr{\"u}ck, Robert and Miller, Denis}, title = {R{\"u}ckzahlungsklauseln f{\"u}r Fortbildungen}, series = {Neue juristische Wochenschrift : NJW Spezial}, volume = {19}, journal = {Neue juristische Wochenschrift : NJW Spezial}, number = {12}, publisher = {C.H. Beck}, address = {M{\"u}nchen}, issn = {1613-4621}, pages = {370 -- 371}, year = {2022}, abstract = {Mit Urteil vom 1.3.2022 (NZA2022, NZA Jahr 2022 Seite 780) hat das BAG erneut {\"u}ber die Wirksamkeit einer R{\"u}ckzahlungsklausel in einer Fortbildungsvereinbarung entschieden. Die Entscheidung reiht sich in eine nicht leicht zu durchschauende Anzahl von Urteilen hierzu ein. Sie dient uns zum Anlass, einen {\"U}berblick {\"u}ber die Rechtsprechung zu geben.}, language = {de} } @phdthesis{Bartz2022, author = {Bartz, Christian}, title = {Reducing the annotation burden: deep learning for optical character recognition using less manual annotations}, doi = {10.25932/publishup-55540}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-555407}, school = {Universit{\"a}t Potsdam}, pages = {xxiv, 183}, year = {2022}, abstract = {Text is a ubiquitous entity in our world and daily life. We encounter it nearly everywhere in shops, on the street, or in our flats. Nowadays, more and more text is contained in digital images. These images are either taken using cameras, e.g., smartphone cameras, or taken using scanning devices such as document scanners. The sheer amount of available data, e.g., millions of images taken by Google Streetview, prohibits manual analysis and metadata extraction. Although much progress was made in the area of optical character recognition (OCR) for printed text in documents, broad areas of OCR are still not fully explored and hold many research challenges. With the mainstream usage of machine learning and especially deep learning, one of the most pressing problems is the availability and acquisition of annotated ground truth for the training of machine learning models because obtaining annotated training data using manual annotation mechanisms is time-consuming and costly. In this thesis, we address of how we can reduce the costs of acquiring ground truth annotations for the application of state-of-the-art machine learning methods to optical character recognition pipelines. To this end, we investigate how we can reduce the annotation cost by using only a fraction of the typically required ground truth annotations, e.g., for scene text recognition systems. We also investigate how we can use synthetic data to reduce the need of manual annotation work, e.g., in the area of document analysis for archival material. In the area of scene text recognition, we have developed a novel end-to-end scene text recognition system that can be trained using inexact supervision and shows competitive/state-of-the-art performance on standard benchmark datasets for scene text recognition. Our method consists of two independent neural networks, combined using spatial transformer networks. Both networks learn together to perform text localization and text recognition at the same time while only using annotations for the recognition task. We apply our model to end-to-end scene text recognition (meaning localization and recognition of words) and pure scene text recognition without any changes in the network architecture. In the second part of this thesis, we introduce novel approaches for using and generating synthetic data to analyze handwriting in archival data. First, we propose a novel preprocessing method to determine whether a given document page contains any handwriting. We propose a novel data synthesis strategy to train a classification model and show that our data synthesis strategy is viable by evaluating the trained model on real images from an archive. Second, we introduce the new analysis task of handwriting classification. Handwriting classification entails classifying a given handwritten word image into classes such as date, word, or number. Such an analysis step allows us to select the best fitting recognition model for subsequent text recognition; it also allows us to reason about the semantic content of a given document page without the need for fine-grained text recognition and further analysis steps, such as Named Entity Recognition. We show that our proposed approaches work well when trained on synthetic data. Further, we propose a flexible metric learning approach to allow zero-shot classification of classes unseen during the network's training. Last, we propose a novel data synthesis algorithm to train off-the-shelf pixel-wise semantic segmentation networks for documents. Our data synthesis pipeline is based on the famous Style-GAN architecture and can synthesize realistic document images with their corresponding segmentation annotation without the need for any annotated data!}, language = {en} } @article{UlrichLutfiRutzenetal.2022, author = {Ulrich, Jens-Uwe and Lutfi, Ahmad and Rutzen, Kilian and Renard, Bernhard Y.}, title = {ReadBouncer}, series = {Bioinformatics}, volume = {38}, journal = {Bioinformatics}, number = {SUPPL 1}, publisher = {Oxford Univ. Press}, address = {Oxford}, issn = {1367-4803}, doi = {10.1093/bioinformatics/btac223}, pages = {153 -- 160}, year = {2022}, abstract = {Motivation: Nanopore sequencers allow targeted sequencing of interesting nucleotide sequences by rejecting other sequences from individual pores. This feature facilitates the enrichment of low-abundant sequences by depleting overrepresented ones in-silico. Existing tools for adaptive sampling either apply signal alignment, which cannot handle human-sized reference sequences, or apply read mapping in sequence space relying on fast graphical processing units (GPU) base callers for real-time read rejection. Using nanopore long-read mapping tools is also not optimal when mapping shorter reads as usually analyzed in adaptive sampling applications. Results: Here, we present a new approach for nanopore adaptive sampling that combines fast CPU and GPU base calling with read classification based on Interleaved Bloom Filters. ReadBouncer improves the potential enrichment of low abundance sequences by its high read classification sensitivity and specificity, outperforming existing tools in the field. It robustly removes even reads belonging to large reference sequences while running on commodity hardware without GPUs, making adaptive sampling accessible for in-field researchers. Readbouncer also provides a user-friendly interface and installer files for end-users without a bioinformatics background.}, language = {en} } @book{SchneiderMaximovaGiese2022, author = {Schneider, Sven and Maximova, Maria and Giese, Holger}, title = {Probabilistic metric temporal graph logic}, number = {146}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-532-3}, issn = {1613-5652}, doi = {10.25932/publishup-54586}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-545867}, publisher = {Universit{\"a}t Potsdam}, pages = {34}, year = {2022}, abstract = {Cyber-physical systems often encompass complex concurrent behavior with timing constraints and probabilistic failures on demand. The analysis whether such systems with probabilistic timed behavior adhere to a given specification is essential. When the states of the system can be represented by graphs, the rule-based formalism of Probabilistic Timed Graph Transformation Systems (PTGTSs) can be used to suitably capture structure dynamics as well as probabilistic and timed behavior of the system. The model checking support for PTGTSs w.r.t. properties specified using Probabilistic Timed Computation Tree Logic (PTCTL) has been already presented. Moreover, for timed graph-based runtime monitoring, Metric Temporal Graph Logic (MTGL) has been developed for stating metric temporal properties on identified subgraphs and their structural changes over time. In this paper, we (a) extend MTGL to the Probabilistic Metric Temporal Graph Logic (PMTGL) by allowing for the specification of probabilistic properties, (b) adapt our MTGL satisfaction checking approach to PTGTSs, and (c) combine the approaches for PTCTL model checking and MTGL satisfaction checking to obtain a Bounded Model Checking (BMC) approach for PMTGL. In our evaluation, we apply an implementation of our BMC approach in AutoGraph to a running example.}, language = {en} } @article{RoostapourNeumannNeumannetal.2022, author = {Roostapour, Vahid and Neumann, Aneta and Neumann, Frank and Friedrich, Tobias}, title = {Pareto optimization for subset selection with dynamic cost constraints}, series = {Artificial intelligence}, volume = {302}, journal = {Artificial intelligence}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0004-3702}, doi = {10.1016/j.artint.2021.103597}, pages = {17}, year = {2022}, abstract = {We consider the subset selection problem for function f with constraint bound B that changes over time. Within the area of submodular optimization, various greedy approaches are commonly used. For dynamic environments we observe that the adaptive variants of these greedy approaches are not able to maintain their approximation quality. Investigating the recently introduced POMC Pareto optimization approach, we show that this algorithm efficiently computes a phi=(alpha(f)/2)(1 - 1/e(alpha)f)-approximation, where alpha(f) is the submodularity ratio of f, for each possible constraint bound b <= B. Furthermore, we show that POMC is able to adapt its set of solutions quickly in the case that B increases. Our experimental investigations for the influence maximization in social networks show the advantage of POMC over generalized greedy algorithms. We also consider EAMC, a new evolutionary algorithm with polynomial expected time guarantee to maintain phi approximation ratio, and NSGA-II with two different population sizes as advanced multi-objective optimization algorithm, to demonstrate their challenges in optimizing the maximum coverage problem. Our empirical analysis shows that, within the same number of evaluations, POMC is able to perform as good as NSGA-II under linear constraint, while EAMC performs significantly worse than all considered algorithms in most cases.}, language = {en} } @book{MeinelWillemsStaubitzetal.2022, author = {Meinel, Christoph and Willems, Christian and Staubitz, Thomas and Sauer, Dominic and Hagedorn, Christiane}, title = {openHPI}, number = {148}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-544-6}, issn = {1613-5652}, doi = {10.25932/publishup-56020}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-560208}, publisher = {Universit{\"a}t Potsdam}, pages = {125}, year = {2022}, abstract = {On the occasion of the 10th openHPI anniversary, this technical report provides information about the HPI MOOC platform, including its core features, technology, and architecture. In an introduction, the platform family with all partner platforms is presented; these now amount to nine platforms, including openHPI. This section introduces openHPI as an advisor and research partner in various projects. In the second chapter, the functionalities and common course formats of the platform are presented. The functionalities are divided into learner and admin features. The learner features section provides detailed information about performance records, courses, and the learning materials of which a course is composed: videos, texts, and quizzes. In addition, the learning materials can be enriched by adding external exercise tools that communicate with the HPI MOOC platform via the Learning Tools Interoperability (LTI) standard. Furthermore, the concept of peer assessments completed the possible learning materials. The section then proceeds with further information on the discussion forum, a fundamental concept of MOOCs compared to traditional e-learning offers. The section is concluded with a description of the quiz recap, learning objectives, mobile applications, gameful learning, and the help desk. The next part of this chapter deals with the admin features. The described functionality is restricted to describing the news and announcements, dashboards and statistics, reporting capabilities, research options with A/B testing, the course feed, and the TransPipe tool to support the process of creating automated or manual subtitles. The platform supports a large variety of additional features, but a detailed description of these features goes beyond the scope of this report. The chapter then elaborates on common course formats and openHPI teaching activities at the HPI. The chapter concludes with some best practices for course design and delivery. The third chapter provides insights into the technology and architecture behind openHPI. A special characteristic of the openHPI project is the conscious decision to operate the complete application from bare metal to platform development. Hence, the chapter starts with a section about the openHPI Cloud, including detailed information about the data center and devices, the used cloud software OpenStack and Ceph, as well as the openHPI Cloud Service provided for the HPI. Afterward, a section on the application technology stack and development tooling describes the application infrastructure components, the used automation, the deployment pipeline, and the tools used for monitoring and alerting. The chapter is concluded with detailed information about the technology stack and concrete platform implementation details. The section describes the service-oriented Ruby on Rails application, inter-service communication, and public APIs. It also provides more information on the design system and components used in the application. The section concludes with a discussion of the original microservice architecture, where we share our insights and reasoning for migrating back to a monolithic application. The last chapter provides a summary and an outlook on the future of digital education.}, language = {en} } @book{MeinelWillemsStaubitzetal.2022, author = {Meinel, Christoph and Willems, Christian and Staubitz, Thomas and Sauer, Dominic and Hagedorn, Christiane}, title = {openHPI}, number = {150}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-546-0}, issn = {1613-5652}, doi = {10.25932/publishup-56179}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-561792}, publisher = {Universit{\"a}t Potsdam}, pages = {86}, year = {2022}, abstract = {Anl{\"a}sslich des 10-j{\"a}hrigen Jubil{\"a}ums von openHPI informiert dieser technische Bericht {\"u}ber die HPI-MOOC-Plattform einschließlich ihrer Kernfunktionen, Technologie und Architektur. In einer Einleitung wird die Plattformfamilie mit allen Partnerplattformen vorgestellt; diese belaufen sich inklusive openHPI aktuell auf neun Plattformen. In diesem Abschnitt wird außerdem gezeigt, wie openHPI als Berater und Forschungspartner in verschiedenen Projekten fungiert. Im zweiten Kapitel werden die Funktionalit{\"a}ten und g{\"a}ngigen Kursformate der Plattform pr{\"a}sentiert. Die Funktionalit{\"a}ten sind in Lerner- und Admin-Funktionen unterteilt. Der Bereich Lernerfunktionen bietet detaillierte Informationen zu Leistungsnachweisen, Kursen und den Lernmaterialien, aus denen sich ein Kurs zusammensetzt: Videos, Texte und Quiz. Dar{\"u}ber hinaus k{\"o}nnen die Lernmaterialien durch externe {\"U}bungstools angereichert werden, die {\"u}ber den Standard Learning Tools Interoperability (LTI) mit der HPI MOOC-Plattform kommunizieren. Das Konzept der Peer-Assessments rundet die m{\"o}glichen Lernmaterialien ab. Der Abschnitt geht dann weiter auf das Diskussionsforum ein, das einen grundlegenden Unterschied von MOOCs im Vergleich zu traditionellen E-Learning-Angeboten darstellt. Zum Abschluss des Abschnitts folgen eine Beschreibung von Quiz-Recap, Lernzielen, mobilen Anwendungen, spielerischen Lernens und dem Helpdesk. Der n{\"a}chste Teil dieses Kapitels besch{\"a}ftigt sich mit den Admin-Funktionen. Die Funktionalit{\"a}tsbeschreibung beschr{\"a}nkt sich Neuigkeiten und Ank{\"u}ndigungen, Dashboards und Statistiken, Berichtsfunktionen, Forschungsoptionen mit A/B-Tests, den Kurs-Feed und das TransPipe-Tool zur Unterst{\"u}tzung beim Erstellen von automatischen oder manuellen Untertiteln. Die Plattform unterst{\"u}tzt außerdem eine Vielzahl zus{\"a}tzlicher Funktionen, doch eine detaillierte Beschreibung dieser Funktionen w{\"u}rde den Rahmen des Berichts sprengen. Das Kapitel geht dann auf g{\"a}ngige Kursformate und openHPI-Lehrveranstaltungen am HPI ein, bevor es mit einigen Best Practices f{\"u}r die Gestaltung und Durchf{\"u}hrung von Kursen schließt. Zum Abschluss des technischen Berichts gibt das letzte Kapitel eine Zusammenfassung und einen Ausblick auf die Zukunft der digitalen Bildung. Ein besonderes Merkmal des openHPI-Projekts ist die bewusste Entscheidung, die komplette Anwendung von den physischen Netzwerkkomponenten bis zur Plattformentwicklung eigenst{\"a}ndig zu betreiben. Bei der vorliegenden deutschen Variante handelt es sich um eine gek{\"u}rzte {\"U}bersetzung des technischen Berichts 148, bei der kein Einblick in die Technologien und Architektur von openHPI gegeben wird. Interessierte Leser:innen k{\"o}nnen im technischen Bericht 148 (vollst{\"a}ndige englische Version) detaillierte Informationen zum Rechenzentrum und den Ger{\"a}ten, der Cloud-Software und dem openHPI Cloud Service aber auch zu Infrastruktur-Anwendungskomponenten wie Entwicklungstools, Automatisierung, Deployment-Pipeline und Monitoring erhalten. Außerdem finden sich dort weitere Informationen {\"u}ber den Technologiestack und konkrete Implementierungsdetails der Plattform inklusive der serviceorientierten Ruby on Rails-Anwendung, die Kommunikation zwischen den Diensten, {\"o}ffentliche APIs, sowie Designsystem und -komponenten. Der Abschnitt schließt mit einer Diskussion {\"u}ber die urspr{\"u}ngliche Microservice-Architektur und die Migration zu einer monolithischen Anwendung.}, language = {de} } @misc{PanzerBenderGronau2022, author = {Panzer, Marcel and Bender, Benedict and Gronau, Norbert}, title = {Neural agent-based production planning and control}, series = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Wirtschafts- und Sozialwissenschaftliche Reihe}, journal = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Wirtschafts- und Sozialwissenschaftliche Reihe}, issn = {1867-5808}, doi = {10.25932/publishup-60477}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-604777}, pages = {26}, year = {2022}, abstract = {Nowadays, production planning and control must cope with mass customization, increased fluctuations in demand, and high competition pressures. Despite prevailing market risks, planning accuracy and increased adaptability in the event of disruptions or failures must be ensured, while simultaneously optimizing key process indicators. To manage that complex task, neural networks that can process large quantities of high-dimensional data in real time have been widely adopted in recent years. Although these are already extensively deployed in production systems, a systematic review of applications and implemented agent embeddings and architectures has not yet been conducted. The main contribution of this paper is to provide researchers and practitioners with an overview of applications and applied embeddings and to motivate further research in neural agent-based production. Findings indicate that neural agents are not only deployed in diverse applications, but are also increasingly implemented in multi-agent environments or in combination with conventional methods — leveraging performances compared to benchmarks and reducing dependence on human experience. This not only implies a more sophisticated focus on distributed production resources, but also broadening the perspective from a local to a global scale. Nevertheless, future research must further increase scalability and reproducibility to guarantee a simplified transfer of results to reality.}, language = {en} } @article{PanzerBenderGronau2022, author = {Panzer, Marcel and Bender, Benedict and Gronau, Norbert}, title = {Neural agent-based production planning and control}, series = {Journal of Manufacturing Systems}, volume = {65}, journal = {Journal of Manufacturing Systems}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0278-6125}, doi = {10.1016/j.jmsy.2022.10.019}, pages = {743 -- 766}, year = {2022}, abstract = {Nowadays, production planning and control must cope with mass customization, increased fluctuations in demand, and high competition pressures. Despite prevailing market risks, planning accuracy and increased adaptability in the event of disruptions or failures must be ensured, while simultaneously optimizing key process indicators. To manage that complex task, neural networks that can process large quantities of high-dimensional data in real time have been widely adopted in recent years. Although these are already extensively deployed in production systems, a systematic review of applications and implemented agent embeddings and architectures has not yet been conducted. The main contribution of this paper is to provide researchers and practitioners with an overview of applications and applied embeddings and to motivate further research in neural agent-based production. Findings indicate that neural agents are not only deployed in diverse applications, but are also increasingly implemented in multi-agent environments or in combination with conventional methods — leveraging performances compared to benchmarks and reducing dependence on human experience. This not only implies a more sophisticated focus on distributed production resources, but also broadening the perspective from a local to a global scale. Nevertheless, future research must further increase scalability and reproducibility to guarantee a simplified transfer of results to reality.}, language = {en} } @book{FlottererMaximovaSchneideretal.2022, author = {Flotterer, Boris and Maximova, Maria and Schneider, Sven and Dyck, Johannes and Z{\"o}llner, Christian and Giese, Holger and H{\´e}ly, Christelle and Gaucherel, C{\´e}dric}, title = {Modeling and Formal Analysis of Meta-Ecosystems with Dynamic Structure using Graph Transformation}, series = {Technische Berichte des Hasso-Plattner-Instituts f{\"u}r Digital Engineering an der Universit{\"a}t Potsdam}, journal = {Technische Berichte des Hasso-Plattner-Instituts f{\"u}r Digital Engineering an der Universit{\"a}t Potsdam}, number = {147}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-533-0}, issn = {1613-5652}, doi = {10.25932/publishup-54764}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-547643}, publisher = {Universit{\"a}t Potsdam}, pages = {47}, year = {2022}, abstract = {The dynamics of ecosystems is of crucial importance. Various model-based approaches exist to understand and analyze their internal effects. In this paper, we model the space structure dynamics and ecological dynamics of meta-ecosystems using the formal technique of Graph Transformation (short GT). We build GT models to describe how a meta-ecosystem (modeled as a graph) can evolve over time (modeled by GT rules) and to analyze these GT models with respect to qualitative properties such as the existence of structural stabilities. As a case study, we build three GT models describing the space structure dynamics and ecological dynamics of three different savanna meta-ecosystems. The first GT model considers a savanna meta-ecosystem that is limited in space to two ecosystem patches, whereas the other two GT models consider two savanna meta-ecosystems that are unlimited in the number of ecosystem patches and only differ in one GT rule describing how the space structure of the meta-ecosystem grows. In the first two GT models, the space structure dynamics and ecological dynamics of the meta-ecosystem shows two main structural stabilities: the first one based on grassland-savanna-woodland transitions and the second one based on grassland-desert transitions. The transition between these two structural stabilities is driven by high-intensity fires affecting the tree components. In the third GT model, the GT rule for savanna regeneration induces desertification and therefore a collapse of the meta-ecosystem. We believe that GT models provide a complementary avenue to that of existing approaches to rigorously study ecological phenomena.}, language = {en} } @inproceedings{SultanowChircuWuestemannetal.2022, author = {Sultanow, Eldar and Chircu, Alina and W{\"u}stemann, Stefanie and Schwan, Andr{\´e} and Lehmann, Andreas and Sept, Andr{\´e} and Szymaski, Oliver and Venkatesan, Sripriya and Ritterbusch, Georg David and Teichmann, Malte Rolf}, title = {Metaverse opportunities for the public sector}, series = {International Conference on Information Systems 2022 : Special Interest Group on Big Data : Proceedings}, booktitle = {International Conference on Information Systems 2022 : Special Interest Group on Big Data : Proceedings}, publisher = {AIS}, address = {Atlanta}, year = {2022}, abstract = {The metaverse is envisioned as a virtual shared space facilitated by emerging technologies such as virtual reality (VR), augmented reality (AR), the Internet of Things (IoT), 5G, artificial intelligence (AI), big data, spatial computing, and digital twins (Allam et al., 2022; Dwivedi et al., 2022; Ravenscraft, 2022; Wiles, 2022). While still a nascent concept, the metaverse has the potential to "transform the physical world, as well as transport or extend physical activities to a virtual world" (Wiles, 2022). Big data technologies will also be essential in managing the enormous amounts of data created in the metaverse (Sun et al., 2022). Metaverse technologies can offer the public sector a host of benefits, such as simplified information exchange, stronger communication with citizens, better access to public services, or benefiting from a new virtual economy. Implementations are underway in several cities around the world (Geraghty et al., 2022). In this paper, we analyze metaverse opportunities for the public sector and explore their application in the context of Germany's Federal Employment Agency. Based on an analysis of academic literature and practical examples, we create a capability map for potential metaverse business capabilities for different areas of the public sector (broadly defined). These include education (virtual training and simulation, digital campuses that offer not just online instruction but a holistic university campus experience, etc.), tourism (virtual travel to remote locations and museums, virtual festival participation, etc.), health (employee training - as for emergency situations, virtual simulations for patient treatment - for example, for depression or anxiety, etc.), military (virtual training to experience operational scenarios without being exposed to a real-world threats, practice strategic decision-making, or gain technical knowledge for operating and repairing equipment, etc.), administrative services (document processing, virtual consultations for citizens, etc.), judiciary (AI decision-making aids, virtual proceedings, etc.), public safety (virtual training for procedural issues, special operations, or unusual situations, etc.), emergency management (training for natural disasters, etc.), and city planning (visualization of future development projects and interactive feedback, traffic management, attraction gamification, etc.), among others. We further identify several metaverse application areas for Germany's Federal Employment Agency. These applications can help it realize the goals of the German government for digital transformation that enables faster, more effective, and innovative government services. They include training of employees, training of customers, and career coaching for customers. These applications can be implemented using interactive learning games with AI agents, virtual representations of the organizational spaces, and avatars interacting with each other in these spaces. Metaverse applications will both use big data (to design the virtual environments) and generate big data (from virtual interactions). Issues related to data availability, quality, storage, processing (and related computing power requirements), interoperability, sharing, privacy and security will need to be addressed in these emerging metaverse applications (Sun et al., 2022). Special attention is needed to understand the potential for power inequities (wealth inequity, algorithmic bias, digital exclusion) due to technologies such as VR (Egliston \& Carter, 2021), harmful surveillance practices (Bibri \& Allam, 2022), and undesirable user behavior or negative psychological impacts (Dwivedi et al., 2022). The results of this exploratory study can inform public sector organizations of emerging metaverse opportunities and enable them to develop plans for action as more of the metaverse technologies become a reality. While the metaverse body of research is still small and research agendas are only now starting to emerge (Dwivedi et al., 2022), this study offers a building block for future development and analysis of metaverse applications.}, language = {en} } @book{DuerschReinMattisetal.2022, author = {D{\"u}rsch, Falco and Rein, Patrick and Mattis, Toni and Hirschfeld, Robert}, title = {Learning from failure}, number = {145}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-528-6}, issn = {1613-5652}, doi = {10.25932/publishup-53755}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-537554}, publisher = {Universit{\"a}t Potsdam}, pages = {87}, year = {2022}, abstract = {Regression testing is a widespread practice in today's software industry to ensure software product quality. Developers derive a set of test cases, and execute them frequently to ensure that their change did not adversely affect existing functionality. As the software product and its test suite grow, the time to feedback during regression test sessions increases, and impedes programmer productivity: developers wait longer for tests to complete, and delays in fault detection render fault removal increasingly difficult. Test case prioritization addresses the problem of long feedback loops by reordering test cases, such that test cases of high failure probability run first, and test case failures become actionable early in the testing process. We ask, given test execution schedules reconstructed from publicly available data, to which extent can their fault detection efficiency improved, and which technique yields the most efficient test schedules with respect to APFD? To this end, we recover regression 6200 test sessions from the build log files of Travis CI, a popular continuous integration service, and gather 62000 accompanying changelists. We evaluate the efficiency of current test schedules, and examine the prioritization results of state-of-the-art lightweight, history-based heuristics. We propose and evaluate a novel set of prioritization algorithms, which connect software changes and test failures in a matrix-like data structure. Our studies indicate that the optimization potential is substantial, because the existing test plans score only 30\% APFD. The predictive power of past test failures proves to be outstanding: simple heuristics, such as repeating tests with failures in recent sessions, result in efficiency scores of 95\% APFD. The best-performing matrix-based heuristic achieves a similar score of 92.5\% APFD. In contrast to prior approaches, we argue that matrix-based techniques are useful beyond the scope of effective prioritization, and enable a number of use cases involving software maintenance. We validate our findings from continuous integration processes by extending a continuous testing tool within development environments with means of test prioritization, and pose further research questions. We think that our findings are suited to propel adoption of (continuous) testing practices, and that programmers' toolboxes should contain test prioritization as an existential productivity tool.}, language = {en} } @article{vonSteinauSteinrueckHoeltge2022, author = {von Steinau-Steinr{\"u}ck, Robert and H{\"o}ltge, Clara}, title = {Krieg in Europa}, series = {NJW spezial}, volume = {19}, journal = {NJW spezial}, number = {8}, publisher = {C.H. Beck}, address = {M{\"u}nchen}, issn = {1613-4621}, pages = {242 -- 243}, year = {2022}, abstract = {Am 24.2.2022 begann der russische Angriffskrieg in der Ukraine. Seitdem fliehen t{\"a}glich zahlreiche ukrainische Staatsb{\"u}rger in die Europ{\"a}ische Union, viele davon nach Deutschland. Vorrangig ist jetzt die Sicherung der Grundbed{\"u}rfnisse, wie Verpflegung, Unterkunft und medizinischer Versorgung. Daneben fragen sich Arbeitgeber, wie sie ukrainische Staatsb{\"u}rger m{\"o}glichst schnell besch{\"a}ftigen k{\"o}nnen. Wir geben einen {\"U}berblick {\"u}ber die M{\"o}glichkeiten, ukrainische Gefl{\"u}chtete m{\"o}glichst schnell in den deutschen Arbeitsmarkt zu integrieren.}, language = {de} } @book{SchneiderMaximovaGiese2022, author = {Schneider, Sven and Maximova, Maria and Giese, Holger}, title = {Invariant Analysis for Multi-Agent Graph Transformation Systems using k-Induction}, number = {143}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-531-6}, issn = {1613-5652}, doi = {10.25932/publishup-54585}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-545851}, publisher = {Universit{\"a}t Potsdam}, pages = {37}, year = {2022}, abstract = {The analysis of behavioral models such as Graph Transformation Systems (GTSs) is of central importance in model-driven engineering. However, GTSs often result in intractably large or even infinite state spaces and may be equipped with multiple or even infinitely many start graphs. To mitigate these problems, static analysis techniques based on finite symbolic representations of sets of states or paths thereof have been devised. We focus on the technique of k-induction for establishing invariants specified using graph conditions. To this end, k-induction generates symbolic paths backwards from a symbolic state representing a violation of a candidate invariant to gather information on how that violation could have been reached possibly obtaining contradictions to assumed invariants. However, GTSs where multiple agents regularly perform actions independently from each other cannot be analyzed using this technique as of now as the independence among backward steps may prevent the gathering of relevant knowledge altogether. In this paper, we extend k-induction to GTSs with multiple agents thereby supporting a wide range of additional GTSs. As a running example, we consider an unbounded number of shuttles driving on a large-scale track topology, which adjust their velocity to speed limits to avoid derailing. As central contribution, we develop pruning techniques based on causality and independence among backward steps and verify that k-induction remains sound under this adaptation as well as terminates in cases where it did not terminate before.}, language = {en} } @article{BenderKoerppen2022, author = {Bender, Benedict and K{\"o}rppen, Tim}, title = {Integriert statt isoliert}, series = {Digital business : cloud}, volume = {26}, journal = {Digital business : cloud}, number = {1}, publisher = {WIN-Verlag GmbH \& Co. KG}, address = {Vaterstetten}, issn = {2510-344X}, pages = {26 -- 27}, year = {2022}, abstract = {Dass Daten und Analysen Innovationstreiber sind und nicht mehr nur einen Hygienefaktor darstellen, haben viele Unternehmen erkannt. Um Potenziale zu heben, m{\"u}ssen Daten zielf{\"u}hrend integriert werden. Komplexe Systemlandschaften und isolierte Datenbest{\"a}nde erschweren dies. Technologien f{\"u}r die erfolgreiche Umsetzung von datengetriebenem Management m{\"u}ssen richtig eingesetzt werden.}, language = {de} } @phdthesis{Plauth2022, author = {Plauth, Max Frederik}, title = {Improving the Accessibility of Heterogeneous System Resources for Application Developers using Programming Abstractions}, doi = {10.25932/publishup-55811}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-558118}, school = {Universit{\"a}t Potsdam}, pages = {ix, 133}, year = {2022}, abstract = {The heterogeneity of today's state-of-the-art computer architectures is confronting application developers with an immense degree of complexity which results from two major challenges. First, developers need to acquire profound knowledge about the programming models or the interaction models associated with each type of heterogeneous system resource to make efficient use thereof. Second, developers must take into account that heterogeneous system resources always need to exchange data with each other in order to work on a problem together. However, this data exchange is always associated with a certain amount of overhead, which is why the amounts of data exchanged should be kept as low as possible. This thesis proposes three programming abstractions to lessen the burdens imposed by these major challenges with the goal of making heterogeneous system resources accessible to a wider range of application developers. The lib842 compression library provides the first method for accessing the compression and decompression facilities of the NX-842 on-chip compression accelerator available in IBM Power CPUs from user space applications running on Linux. Addressing application development of scale-out GPU workloads, the CloudCL framework makes the resources of GPU clusters more accessible by hiding many aspects of distributed computing while enabling application developers to focus on the aspects of the data parallel programming model associated with GPUs. Furthermore, CloudCL is augmented with transparent data compression facilities based on the lib842 library in order to improve the efficiency of data transfers among cluster nodes. The improved data transfer efficiency provided by the integration of transparent data compression yields performance improvements ranging between 1.11x and 2.07x across four data-intensive scale-out GPU workloads. To investigate the impact of programming abstractions for data placement in NUMA systems, a comprehensive evaluation of the PGASUS framework for NUMA-aware C++ application development is conducted. On a wide range of test systems, the evaluation demonstrates that PGASUS does not only improve the developer experience across all workloads, but that it is also capable of outperforming NUMA-agnostic implementations with average performance improvements of 1.56x. Based on these programming abstractions, this thesis demonstrates that by providing a sufficient degree of abstraction, the accessibility of heterogeneous system resources can be improved for application developers without occluding performance-critical properties of the underlying hardware.}, language = {en} } @phdthesis{Boeken2022, author = {B{\"o}ken, Bj{\"o}rn}, title = {Improving prediction accuracy using dynamic information}, doi = {10.25932/publishup-58512}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-585125}, school = {Universit{\"a}t Potsdam}, pages = {xii, 160}, year = {2022}, abstract = {Accurately solving classification problems nowadays is likely to be the most relevant machine learning task. Binary classification separating two classes only is algorithmically simpler but has fewer potential applications as many real-world problems are multi-class. On the reverse, separating only a subset of classes simplifies the classification task. Even though existing multi-class machine learning algorithms are very flexible regarding the number of classes, they assume that the target set Y is fixed and cannot be restricted once the training is finished. On the other hand, existing state-of-the-art production environments are becoming increasingly interconnected with the advance of Industry 4.0 and related technologies such that additional information can simplify the respective classification problems. In light of this, the main aim of this thesis is to introduce dynamic classification that generalizes multi-class classification such that the target class set can be restricted arbitrarily to a non-empty class subset M of Y at any time between two consecutive predictions. This task is solved by a combination of two algorithmic approaches. First, classifier calibration, which transforms predictions into posterior probability estimates that are intended to be well calibrated. The analysis provided focuses on monotonic calibration and in particular corrects wrong statements that appeared in the literature. It also reveals that bin-based evaluation metrics, which became popular in recent years, are unjustified and should not be used at all. Next, the validity of Platt scaling, which is the most relevant parametric calibration approach, is analyzed in depth. In particular, its optimality for classifier predictions distributed according to four different families of probability distributions as well its equivalence with Beta calibration up to a sigmoidal preprocessing are proven. For non-monotonic calibration, extended variants on kernel density estimation and the ensemble method EKDE are introduced. Finally, the calibration techniques are evaluated using a simulation study with complete information as well as on a selection of 46 real-world data sets. Building on this, classifier calibration is applied as part of decomposition-based classification that aims to reduce multi-class problems to simpler (usually binary) prediction tasks. For the involved fusing step performed at prediction time, a new approach based on evidence theory is presented that uses classifier calibration to model mass functions. This allows the analysis of decomposition-based classification against a strictly formal background and to prove closed-form equations for the overall combinations. Furthermore, the same formalism leads to a consistent integration of dynamic class information, yielding a theoretically justified and computationally tractable dynamic classification model. The insights gained from this modeling are combined with pairwise coupling, which is one of the most relevant reduction-based classification approaches, such that all individual predictions are combined with a weight. This not only generalizes existing works on pairwise coupling but also enables the integration of dynamic class information. Lastly, a thorough empirical study is performed that compares all newly introduced approaches to existing state-of-the-art techniques. For this, evaluation metrics for dynamic classification are introduced that depend on corresponding sampling strategies. Thereafter, these are applied during a three-part evaluation. First, support vector machines and random forests are applied on 26 data sets from the UCI Machine Learning Repository. Second, two state-of-the-art deep neural networks are evaluated on five benchmark data sets from a relatively recent reference work. Here, computationally feasible strategies to apply the presented algorithms in combination with large-scale models are particularly relevant because a naive application is computationally intractable. Finally, reference data from a real-world process allowing the inclusion of dynamic class information are collected and evaluated. The results show that in combination with support vector machines and random forests, pairwise coupling approaches yield the best results, while in combination with deep neural networks, differences between the different approaches are mostly small to negligible. Most importantly, all results empirically confirm that dynamic classification succeeds in improving the respective prediction accuracies. Therefore, it is crucial to pass dynamic class information in respective applications, which requires an appropriate digital infrastructure.}, language = {en} } @book{FreundRaetschHradilaketal.2022, author = {Freund, Rieke and R{\"a}tsch, Jan Philip and Hradilak, Franziska and Vidic, Benedikt and Heß, Oliver and Lißner, Nils and W{\"o}lert, Hendrik and Lincke, Jens and Beckmann, Tom and Hirschfeld, Robert}, title = {Implementing a crowd-sourced picture archive for Bad Harzburg}, number = {149}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-545-3}, issn = {1613-5652}, doi = {10.25932/publishup-56029}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-560291}, publisher = {Universit{\"a}t Potsdam}, pages = {x, 191}, year = {2022}, abstract = {Pictures are a medium that helps make the past tangible and preserve memories. Without context, they are not able to do so. Pictures are brought to life by their associated stories. However, the older pictures become, the fewer contemporary witnesses can tell these stories. Especially for large, analog picture archives, knowledge and memories are spread over many people. This creates several challenges: First, the pictures must be digitized to save them from decaying and make them available to the public. Since a simple listing of all the pictures is confusing, the pictures should be structured accessibly. Second, known information that makes the stories vivid needs to be added to the pictures. Users should get the opportunity to contribute their knowledge and memories. To make this usable for all interested parties, even for older, less technophile generations, the interface should be intuitive and error-tolerant. The resulting requirements are not covered in their entirety by any existing software solution without losing the intuitive interface or the scalability of the system. Therefore, we have developed our digital picture archive within the scope of a bachelor project in cooperation with the Bad Harzburg-Stiftung. For the implementation of this web application, we use the UI framework React in the frontend, which communicates via a GraphQL interface with the Content Management System Strapi in the backend. The use of this system enables our project partner to create an efficient process from scanning analog pictures to presenting them to visitors in an organized and annotated way. To customize the solution for both picture delivery and information contribution for our target group, we designed prototypes and evaluated them with people from Bad Harzburg. This helped us gain valuable insights into our system's usability and future challenges as well as requirements. Our web application is already being used daily by our project partner. During the project, we still came up with numerous ideas for additional features to further support the exchange of knowledge.}, language = {en} } @article{GevayRablBressetal.2022, author = {G{\´e}vay, G{\´a}bor E. and Rabl, Tilmann and Breß, Sebastian and Madai-Tahy, Lor{\´a}nd and Quian{\´e}-Ruiz, Jorge-Arnulfo and Markl, Volker}, title = {Imperative or functional control flow handling}, series = {SIGMOD record / Association for Computing Machinery, Special Interest Group on Management of Data}, volume = {51}, journal = {SIGMOD record / Association for Computing Machinery, Special Interest Group on Management of Data}, number = {1}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {0163-5808}, doi = {10.1145/3542700.3542715}, pages = {60 -- 67}, year = {2022}, abstract = {Modern data analysis tasks often involve control flow statements, such as the iterations in PageRank and K-means. To achieve scalability, developers usually implement these tasks in distributed dataflow systems, such as Spark and Flink. Designers of such systems have to choose between providing imperative or functional control flow constructs to users. Imperative constructs are easier to use, but functional constructs are easier to compile to an efficient dataflow job. We propose Mitos, a system where control flow is both easy to use and efficient. Mitos relies on an intermediate representation based on the static single assignment form. This allows us to abstract away from specific control flow constructs and treat any imperative control flow uniformly both when building the dataflow job and when coordinating the distributed execution.}, language = {en} } @misc{MontiRautenstrauchGhanbarietal.2022, author = {Monti, Remo and Rautenstrauch, Pia and Ghanbari, Mahsa and Rani James, Alva and Kirchler, Matthias and Ohler, Uwe and Konigorski, Stefan and Lippert, Christoph}, title = {Identifying interpretable gene-biomarker associations with functionally informed kernel-based tests in 190,000 exomes}, series = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, journal = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, number = {16}, doi = {10.25932/publishup-58607}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-586078}, pages = {16}, year = {2022}, abstract = {Here we present an exome-wide rare genetic variant association study for 30 blood biomarkers in 191,971 individuals in the UK Biobank. We compare gene- based association tests for separate functional variant categories to increase interpretability and identify 193 significant gene-biomarker associations. Genes associated with biomarkers were ~ 4.5-fold enriched for conferring Mendelian disorders. In addition to performing weighted gene-based variant collapsing tests, we design and apply variant-category-specific kernel-based tests that integrate quantitative functional variant effect predictions for mis- sense variants, splicing and the binding of RNA-binding proteins. For these tests, we present a computationally efficient combination of the likelihood- ratio and score tests that found 36\% more associations than the score test alone while also controlling the type-1 error. Kernel-based tests identified 13\% more associations than their gene-based collapsing counterparts and had advantages in the presence of gain of function missense variants. We introduce local collapsing by amino acid position for missense variants and use it to interpret associations and identify potential novel gain of function variants in PIEZO1. Our results show the benefits of investigating different functional mechanisms when performing rare-variant association tests, and demonstrate pervasive rare-variant contribution to biomarker variability.}, language = {en} } @article{MontiRautenstrauchGhanbarietal.2022, author = {Monti, Remo and Rautenstrauch, Pia and Ghanbari, Mahsa and Rani James, Alva and Kirchler, Matthias and Ohler, Uwe and Konigorski, Stefan and Lippert, Christoph}, title = {Identifying interpretable gene-biomarker associations with functionally informed kernel-based tests in 190,000 exomes}, series = {Nature Communications}, volume = {13}, journal = {Nature Communications}, publisher = {Nature Publishing Group UK}, address = {London}, issn = {2041-1723}, doi = {10.1038/s41467-022-32864-2}, pages = {16}, year = {2022}, abstract = {Here we present an exome-wide rare genetic variant association study for 30 blood biomarkers in 191,971 individuals in the UK Biobank. We compare gene- based association tests for separate functional variant categories to increase interpretability and identify 193 significant gene-biomarker associations. Genes associated with biomarkers were ~ 4.5-fold enriched for conferring Mendelian disorders. In addition to performing weighted gene-based variant collapsing tests, we design and apply variant-category-specific kernel-based tests that integrate quantitative functional variant effect predictions for mis- sense variants, splicing and the binding of RNA-binding proteins. For these tests, we present a computationally efficient combination of the likelihood- ratio and score tests that found 36\% more associations than the score test alone while also controlling the type-1 error. Kernel-based tests identified 13\% more associations than their gene-based collapsing counterparts and had advantages in the presence of gain of function missense variants. We introduce local collapsing by amino acid position for missense variants and use it to interpret associations and identify potential novel gain of function variants in PIEZO1. Our results show the benefits of investigating different functional mechanisms when performing rare-variant association tests, and demonstrate pervasive rare-variant contribution to biomarker variability.}, language = {en} } @book{RanaMohapatraSidorovaetal.2022, author = {Rana, Kaushik and Mohapatra, Durga Prasad and Sidorova, Julia and Lundberg, Lars and Sk{\"o}ld, Lars and Lopes Grim, Lu{\´i}s Fernando and Sampaio Gradvohl, Andr{\´e} Leon and Cremerius, Jonas and Siegert, Simon and Weltzien, Anton von and Baldi, Annika and Klessascheck, Finn and Kalancha, Svitlana and Lichtenstein, Tom and Shaabani, Nuhad and Meinel, Christoph and Friedrich, Tobias and Lenzner, Pascal and Schumann, David and Wiese, Ingmar and Sarna, Nicole and Wiese, Lena and Tashkandi, Araek Sami and van der Walt, Est{\´e}e and Eloff, Jan H. P. and Schmidt, Christopher and H{\"u}gle, Johannes and Horschig, Siegfried and Uflacker, Matthias and Najafi, Pejman and Sapegin, Andrey and Cheng, Feng and Stojanovic, Dragan and Stojnev Ilić, Aleksandra and Djordjevic, Igor and Stojanovic, Natalija and Predic, Bratislav and Gonz{\´a}lez-Jim{\´e}nez, Mario and de Lara, Juan and Mischkewitz, Sven and Kainz, Bernhard and van Hoorn, Andr{\´e} and Ferme, Vincenzo and Schulz, Henning and Knigge, Marlene and Hecht, Sonja and Prifti, Loina and Krcmar, Helmut and Fabian, Benjamin and Ermakova, Tatiana and Kelkel, Stefan and Baumann, Annika and Morgenstern, Laura and Plauth, Max and Eberhard, Felix and Wolff, Felix and Polze, Andreas and Cech, Tim and Danz, Noel and Noack, Nele Sina and Pirl, Lukas and Beilharz, Jossekin Jakob and De Oliveira, Roberto C. L. and Soares, F{\´a}bio Mendes and Juiz, Carlos and Bermejo, Belen and M{\"u}hle, Alexander and Gr{\"u}ner, Andreas and Saxena, Vageesh and Gayvoronskaya, Tatiana and Weyand, Christopher and Krause, Mirko and Frank, Markus and Bischoff, Sebastian and Behrens, Freya and R{\"u}ckin, Julius and Ziegler, Adrian and Vogel, Thomas and Tran, Chinh and Moser, Irene and Grunske, Lars and Sz{\´a}rnyas, G{\´a}bor and Marton, J{\´o}zsef and Maginecz, J{\´a}nos and Varr{\´o}, D{\´a}niel and Antal, J{\´a}nos Benjamin}, title = {HPI Future SOC Lab - Proceedings 2018}, number = {151}, editor = {Meinel, Christoph and Polze, Andreas and Beins, Karsten and Strotmann, Rolf and Seibold, Ulrich and R{\"o}dszus, Kurt and M{\"u}ller, J{\"u}rgen}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-547-7}, issn = {1613-5652}, doi = {10.25932/publishup-56371}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-563712}, publisher = {Universit{\"a}t Potsdam}, pages = {x, 277}, year = {2022}, abstract = {The "HPI Future SOC Lab" is a cooperation of the Hasso Plattner Institute (HPI) and industry partners. Its mission is to enable and promote exchange and interaction between the research community and the industry partners. The HPI Future SOC Lab provides researchers with free of charge access to a complete infrastructure of state of the art hard and software. This infrastructure includes components, which might be too expensive for an ordinary research environment, such as servers with up to 64 cores and 2 TB main memory. The offerings address researchers particularly from but not limited to the areas of computer science and business information systems. Main areas of research include cloud computing, parallelization, and In-Memory technologies. This technical report presents results of research projects executed in 2018. Selected projects have presented their results on April 17th and November 14th 2017 at the Future SOC Lab Day events.}, language = {en} } @article{AlnoorTiberiusAtiyahetal.2022, author = {Alnoor, Alhamzah and Tiberius, Victor and Atiyah, Abbas Gatea and Khaw, Khai Wah and Yin, Teh Sin and Chew, XinYing and Abbas, Sammar}, title = {How positive and negative electronic word of mouth (eWOM) affects customers' intention to use social commerce?}, series = {International journal of human computer interaction}, journal = {International journal of human computer interaction}, publisher = {Taylor \& Francis}, address = {New York}, issn = {1044-7318}, doi = {10.1080/10447318.2022.2125610}, pages = {1 -- 30}, year = {2022}, abstract = {Advances in Web 2.0 technologies have led to the widespread assimilation of electronic commerce platforms as an innovative shopping method and an alternative to traditional shopping. However, due to pro-technology bias, scholars focus more on adopting technology, and slightly less attention has been given to the impact of electronic word of mouth (eWOM) on customers' intention to use social commerce. This study addresses the gap by examining the intention through exploring the effect of eWOM on males' and females' intentions and identifying the mediation of perceived crowding. To this end, we adopted a dual-stage multi-group structural equation modeling and artificial neural network (SEM-ANN) approach. We successfully extended the eWOM concept by integrating negative and positive factors and perceived crowding. The results reveal the causal and non-compensatory relationships between the constructs. The variables supported by the SEM analysis are adopted as the ANN model's input neurons. According to the natural significance obtained from the ANN approach, males' intentions to accept social commerce are related mainly to helping the company, followed by core functionalities. In contrast, females are highly influenced by technical aspects and mishandling. The ANN model predicts customers' intentions to use social commerce with an accuracy of 97\%. We discuss the theoretical and practical implications of increasing customers' intention toward social commerce channels among consumers based on our findings.}, language = {en} } @article{BonifatiMiorNaumannetal.2022, author = {Bonifati, Angela and Mior, Michael J. and Naumann, Felix and Noack, Nele Sina}, title = {How inclusive are we?}, series = {SIGMOD record / Association for Computing Machinery, Special Interest Group on Management of Data}, volume = {50}, journal = {SIGMOD record / Association for Computing Machinery, Special Interest Group on Management of Data}, number = {4}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {0163-5808}, doi = {10.1145/3516431.3516438}, pages = {30 -- 35}, year = {2022}, abstract = {ACM SIGMOD, VLDB and other database organizations have committed to fostering an inclusive and diverse community, as do many other scientific organizations. Recently, different measures have been taken to advance these goals, especially for underrepresented groups. One possible measure is double-blind reviewing, which aims to hide gender, ethnicity, and other properties of the authors.
We report the preliminary results of a gender diversity analysis of publications of the database community across several peer-reviewed venues, and also compare women's authorship percentages in both single-blind and double-blind venues along the years. We also obtained a cross comparison of the obtained results in data management with other relevant areas in Computer Science.}, language = {en} } @article{MattisBeckmannReinetal.2022, author = {Mattis, Toni and Beckmann, Tom and Rein, Patrick and Hirschfeld, Robert}, title = {First-class concepts}, series = {Journal of object technology : JOT / ETH Z{\"u}rich, Department of Computer Science}, volume = {21}, journal = {Journal of object technology : JOT / ETH Z{\"u}rich, Department of Computer Science}, number = {2}, publisher = {ETH Z{\"u}rich, Department of Computer Science}, address = {Z{\"u}rich}, issn = {1660-1769}, doi = {10.5381/jot.2022.21.2.a6}, pages = {1 -- 15}, year = {2022}, abstract = {Ideally, programs are partitioned into independently maintainable and understandable modules. As a system grows, its architecture gradually loses the capability to accommodate new concepts in a modular way. While refactoring is expensive and not always possible, and the programming language might lack dedicated primary language constructs to express certain cross-cutting concerns, programmers are still able to explain and delineate convoluted concepts through secondary means: code comments, use of whitespace and arrangement of code, documentation, or communicating tacit knowledge.
Secondary constructs are easy to change and provide high flexibility in communicating cross-cutting concerns and other concepts among programmers. However, such secondary constructs usually have no reified representation that can be explored and manipulated as first-class entities through the programming environment.
In this exploratory work, we discuss novel ways to express a wide range of concepts, including cross-cutting concerns, patterns, and lifecycle artifacts independently of the dominant decomposition imposed by an existing architecture. We propose the representation of concepts as first-class objects inside the programming environment that retain the capability to change as easily as code comments. We explore new tools that allow programmers to view, navigate, and change programs based on conceptual perspectives. In a small case study, we demonstrate how such views can be created and how the programming experience changes from draining programmers' attention by stretching it across multiple modules toward focusing it on cohesively presented concepts. Our designs are geared toward facilitating multiple secondary perspectives on a system to co-exist in symbiosis with the original architecture, hence making it easier to explore, understand, and explain complex contexts and narratives that are hard or impossible to express using primary modularity constructs.}, language = {en} } @book{EichenrothReinHirschfeld2022, author = {Eichenroth, Friedrich and Rein, Patrick and Hirschfeld, Robert}, title = {Fast packrat parsing in a live programming environment}, series = {Technische Berichte des Hasso-Plattner-Instituts f{\"u}r Digital Engineering an der Universit{\"a}t Potsdam}, journal = {Technische Berichte des Hasso-Plattner-Instituts f{\"u}r Digital Engineering an der Universit{\"a}t Potsdam}, number = {135}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-503-3}, issn = {1613-5652}, doi = {10.25932/publishup-49124}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-491242}, publisher = {Universit{\"a}t Potsdam}, pages = {79}, year = {2022}, abstract = {Language developers who design domain-specific languages or new language features need a way to make fast changes to language definitions. Those fast changes require immediate feedback. Also, it should be possible to parse the developed languages quickly to handle extensive sets of code. Parsing expression grammars provides an easy to understand method for language definitions. Packrat parsing is a method to parse grammars of this kind, but this method is unable to handle left-recursion properly. Existing solutions either partially rewrite left-recursive rules and partly forbid them, or use complex extensions to packrat parsing that are hard to understand and cost-intensive. We investigated methods to make parsing as fast as possible, using easy to follow algorithms while not losing the ability to make fast changes to grammars. We focused our efforts on two approaches. One is to start from an existing technique for limited left-recursion rewriting and enhance it to work for general left-recursive grammars. The second approach is to design a grammar compilation process to find left-recursion before parsing, and in this way, reduce computational costs wherever possible and generate ready to use parser classes. Rewriting parsing expression grammars is a task that, if done in a general way, unveils a large number of cases such that any rewriting algorithm surpasses the complexity of other left-recursive parsing algorithms. Lookahead operators introduce this complexity. However, most languages have only little portions that are left-recursive and in virtually all cases, have no indirect or hidden left-recursion. This means that the distinction of left-recursive parts of grammars from components that are non-left-recursive holds great improvement potential for existing parsers. In this report, we list all the required steps for grammar rewriting to handle left-recursion, including grammar analysis, grammar rewriting itself, and syntax tree restructuring. Also, we describe the implementation of a parsing expression grammar framework in Squeak/Smalltalk and the possible interactions with the already existing parser Ohm/S. We quantitatively benchmarked this framework directing our focus on parsing time and the ability to use it in a live programming context. Compared with Ohm, we achieved massive parsing time improvements while preserving the ability to use our parser it as a live programming tool. The work is essential because, for one, we outlined the difficulties and complexity that come with grammar rewriting. Also, we removed the existing limitations that came with left-recursion by eliminating them before parsing.}, language = {en} } @phdthesis{Niephaus2022, author = {Niephaus, Fabio}, title = {Exploratory tool-building platforms for polyglot virtual machines}, doi = {10.25932/publishup-57177}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-571776}, school = {Universit{\"a}t Potsdam}, pages = {xxi, 249}, year = {2022}, abstract = {Polyglot programming allows developers to use multiple programming languages within the same software project. While it is common to use more than one language in certain programming domains, developers also apply polyglot programming for other purposes such as to re-use software written in other languages. Although established approaches to polyglot programming come with significant limitations, for example, in terms of performance and tool support, developers still use them to be able to combine languages. Polyglot virtual machines (VMs) such as GraalVM provide a new level of polyglot programming, allowing languages to directly interact with each other. This reduces the amount of glue code needed to combine languages, results in better performance, and enables tools such as debuggers to work across languages. However, only a little research has focused on novel tools that are designed to support developers in building software with polyglot VMs. One reason is that tool-building is often an expensive activity, another one is that polyglot VMs are still a moving target as their use cases and requirements are not yet well understood. In this thesis, we present an approach that builds on existing self-sustaining programming systems such as Squeak/Smalltalk to enable exploratory programming, a practice for exploring and gathering software requirements, and re-use their extensive tool-building capabilities in the context of polyglot VMs. Based on TruffleSqueak, our implementation for the GraalVM, we further present five case studies that demonstrate how our approach helps tool developers to design and build tools for polyglot programming. We further show that TruffleSqueak can also be used by application developers to build and evolve polyglot applications at run-time and by language and runtime developers to understand the dynamic behavior of GraalVM languages and internals. Since our platform allows all these developers to apply polyglot programming, it can further help to better understand the advantages, use cases, requirements, and challenges of polyglot VMs. Moreover, we demonstrate that our approach can also be applied to other polyglot VMs and that insights gained through it are transferable to other programming systems. We conclude that our research on tools for polyglot programming is an important step toward making polyglot VMs more approachable for developers in practice. With good tool support, we believe polyglot VMs can make it much more common for developers to take advantage of multiple languages and their ecosystems when building software.}, language = {en} } @article{BlaesiusFriedrichLischeidetal.2022, author = {Bl{\"a}sius, Thomas and Friedrich, Tobias and Lischeid, Julius and Meeks, Kitty and Schirneck, Friedrich Martin}, title = {Efficiently enumerating hitting sets of hypergraphs arising in data profiling}, series = {Journal of computer and system sciences : JCSS}, volume = {124}, journal = {Journal of computer and system sciences : JCSS}, publisher = {Elsevier}, address = {San Diego}, issn = {0022-0000}, doi = {10.1016/j.jcss.2021.10.002}, pages = {192 -- 213}, year = {2022}, abstract = {The transversal hypergraph problem asks to enumerate the minimal hitting sets of a hypergraph. If the solutions have bounded size, Eiter and Gottlob [SICOMP'95] gave an algorithm running in output-polynomial time, but whose space requirement also scales with the output. We improve this to polynomial delay and space. Central to our approach is the extension problem, deciding for a set X of vertices whether it is contained in any minimal hitting set. We show that this is one of the first natural problems to be W[3]-complete. We give an algorithm for the extension problem running in time O(m(vertical bar X vertical bar+1) n) and prove a SETH-lower bound showing that this is close to optimal. We apply our enumeration method to the discovery problem of minimal unique column combinations from data profiling. Our empirical evaluation suggests that the algorithm outperforms its worst-case guarantees on hypergraphs stemming from real-world databases.}, language = {en} } @phdthesis{Draisbach2022, author = {Draisbach, Uwe}, title = {Efficient duplicate detection and the impact of transitivity}, doi = {10.25932/publishup-57214}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-572140}, school = {Universit{\"a}t Potsdam}, pages = {x, 150}, year = {2022}, abstract = {Duplicate detection describes the process of finding multiple representations of the same real-world entity in the absence of a unique identifier, and has many application areas, such as customer relationship management, genealogy and social sciences, or online shopping. Due to the increasing amount of data in recent years, the problem has become even more challenging on the one hand, but has led to a renaissance in duplicate detection research on the other hand. This thesis examines the effects and opportunities of transitive relationships on the duplicate detection process. Transitivity implies that if record pairs ⟨ri,rj⟩ and ⟨rj,rk⟩ are classified as duplicates, then also record pair ⟨ri,rk⟩ has to be a duplicate. However, this reasoning might contradict with the pairwise classification, which is usually based on the similarity of objects. An essential property of similarity, in contrast to equivalence, is that similarity is not necessarily transitive. First, we experimentally evaluate the effect of an increasing data volume on the threshold selection to classify whether a record pair is a duplicate or non-duplicate. Our experiments show that independently of the pair selection algorithm and the used similarity measure, selecting a suitable threshold becomes more difficult with an increasing number of records due to an increased probability of adding a false duplicate to an existing cluster. Thus, the best threshold changes with the dataset size, and a good threshold for a small (possibly sampled) dataset is not necessarily a good threshold for a larger (possibly complete) dataset. As data grows over time, earlier selected thresholds are no longer a suitable choice, and the problem becomes worse for datasets with larger clusters. Second, we present with the Duplicate Count Strategy (DCS) and its enhancement DCS++ two alternatives to the standard Sorted Neighborhood Method (SNM) for the selection of candidate record pairs. DCS adapts SNMs window size based on the number of detected duplicates and DCS++ uses transitive dependencies to save complex comparisons for finding duplicates in larger clusters. We prove that with a proper (domain- and data-independent!) threshold, DCS++ is more efficient than SNM without loss of effectiveness. Third, we tackle the problem of contradicting pairwise classifications. Usually, the transitive closure is used for pairwise classifications to obtain a transitively closed result set. However, the transitive closure disregards negative classifications. We present three new and several existing clustering algorithms and experimentally evaluate them on various datasets and under various algorithm configurations. The results show that the commonly used transitive closure is inferior to most other clustering algorithms, especially for the precision of results. In scenarios with larger clusters, our proposed EMCC algorithm is, together with Markov Clustering, the best performing clustering approach for duplicate detection, although its runtime is longer than Markov Clustering due to the subexponential time complexity. EMCC especially outperforms Markov Clustering regarding the precision of the results and additionally has the advantage that it can also be used in scenarios where edge weights are not available.}, language = {en} } @article{SchmidlPapenbrock2022, author = {Schmidl, Sebastian and Papenbrock, Thorsten}, title = {Efficient distributed discovery of bidirectional order dependencies}, series = {The VLDB journal}, volume = {31}, journal = {The VLDB journal}, number = {1}, publisher = {Springer}, address = {Berlin ; Heidelberg ; New York}, issn = {1066-8888}, doi = {10.1007/s00778-021-00683-4}, pages = {49 -- 74}, year = {2022}, abstract = {Bidirectional order dependencies (bODs) capture order relationships between lists of attributes in a relational table. They can express that, for example, sorting books by publication date in ascending order also sorts them by age in descending order. The knowledge about order relationships is useful for many data management tasks, such as query optimization, data cleaning, or consistency checking. Because the bODs of a specific dataset are usually not explicitly given, they need to be discovered. The discovery of all minimal bODs (in set-based canonical form) is a task with exponential complexity in the number of attributes, though, which is why existing bOD discovery algorithms cannot process datasets of practically relevant size in a reasonable time. In this paper, we propose the distributed bOD discovery algorithm DISTOD, whose execution time scales with the available hardware. DISTOD is a scalable, robust, and elastic bOD discovery approach that combines efficient pruning techniques for bOD candidates in set-based canonical form with a novel, reactive, and distributed search strategy. Our evaluation on various datasets shows that DISTOD outperforms both single-threaded and distributed state-of-the-art bOD discovery algorithms by up to orders of magnitude; it can, in particular, process much larger datasets.}, language = {en} } @article{NdashimyeHebieTjaden2022, author = {Ndashimye, Felix and Hebie, Oumarou and Tjaden, Jasper}, title = {Effectiveness of WhatsApp for measuring migration in follow-up phone surveys}, series = {Social science computer review}, journal = {Social science computer review}, publisher = {Sage}, address = {Thousand Oaks}, issn = {0894-4393}, doi = {10.1177/08944393221111340}, pages = {20}, year = {2022}, abstract = {Phone surveys have increasingly become important data collection tools in developing countries, particularly in the context of sudden contact restrictions due to the COVID-19 pandemic. So far, there is limited evidence regarding the potential of the messenger service WhatsApp for remote data collection despite its large global coverage and expanding membership. WhatsApp may offer advantages in terms of reducing panel attrition and cutting survey costs. WhatsApp may offer additional benefits to migration scholars interested in cross-border migration behavior which is notoriously difficult to measure using conventional face-to-face surveys. In this field experiment, we compared the response rates between WhatsApp and interactive voice response (IVR) modes using a sample of 8446 contacts in Senegal and Guinea. At 12\%, WhatsApp survey response rates were nearly eight percentage points lower than IVR survey response rates. However, WhatsApp offers higher survey completion rates, substantially lower costs and does not introduce more sample selection bias compared to IVR. We discuss the potential of WhatsApp surveys in low-income contexts and provide practical recommendations for field implementation.}, language = {en} } @phdthesis{Jiang2022, author = {Jiang, Lan}, title = {Discovering metadata in data files}, doi = {10.25932/publishup-56620}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-566204}, school = {Universit{\"a}t Potsdam}, pages = {x, ii, 117}, year = {2022}, abstract = {It is estimated that data scientists spend up to 80\% of the time exploring, cleaning, and transforming their data. A major reason for that expenditure is the lack of knowledge about the used data, which are often from different sources and have heterogeneous structures. As a means to describe various properties of data, metadata can help data scientists understand and prepare their data, saving time for innovative and valuable data analytics. However, metadata do not always exist: some data file formats are not capable of storing them; metadata were deleted for privacy concerns; legacy data may have been produced by systems that were not designed to store and handle meta- data. As data are being produced at an unprecedentedly fast pace and stored in diverse formats, manually creating metadata is not only impractical but also error-prone, demanding automatic approaches for metadata detection. In this thesis, we are focused on detecting metadata in CSV files - a type of plain-text file that, similar to spreadsheets, may contain different types of content at arbitrary positions. We propose a taxonomy of metadata in CSV files and specifically address the discovery of three different metadata: line and cell type, aggregations, and primary keys and foreign keys. Data are organized in an ad-hoc manner in CSV files, and do not follow a fixed structure, which is assumed by common data processing tools. Detecting the structure of such files is a prerequisite of extracting information from them, which can be addressed by detecting the semantic type, such as header, data, derived, or footnote, of each line or each cell. We propose the supervised- learning approach Strudel to detect the type of lines and cells. CSV files may also include aggregations. An aggregation represents the arithmetic relationship between a numeric cell and a set of other numeric cells. Our proposed AggreCol algorithm is capable of detecting aggregations of five arithmetic functions in CSV files. Note that stylistic features, such as font style and cell background color, do not exist in CSV files. Our proposed algorithms address the respective problems by using only content, contextual, and computational features. Storing a relational table is also a common usage of CSV files. Primary keys and foreign keys are important metadata for relational databases, which are usually not present for database instances dumped as plain-text files. We propose the HoPF algorithm to holistically detect both constraints in relational databases. Our approach is capable of distinguishing true primary and foreign keys from a great amount of spurious unique column combinations and inclusion dependencies, which can be detected by state-of-the-art data profiling algorithms.}, language = {en} } @book{MeinelJohnWollowski2022, author = {Meinel, Christoph and John, Catrina and Wollowski, Tobias}, title = {Die HPI Schul-Cloud - Von der Vision zur digitale Infrastruktur f{\"u}r deutsche Schulen}, number = {144}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-526-2}, issn = {1613-5652}, doi = {10.25932/publishup-53586}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-535860}, publisher = {Universit{\"a}t Potsdam}, pages = {v, 77}, year = {2022}, abstract = {Digitale Medien sind aus unserem Alltag kaum noch wegzudenken. Einer der zentralsten Bereiche f{\"u}r unsere Gesellschaft, die schulische Bildung, darf hier nicht hintanstehen. Wann immer der Einsatz digital unterst{\"u}tzter Tools p{\"a}dagogisch sinnvoll ist, muss dieser in einem sicheren Rahmen erm{\"o}glicht werden k{\"o}nnen. Die HPI Schul-Cloud ist dieser Vision gefolgt, die vom Nationalen IT-Gipfel 2016 angestoßen wurde und dem Bericht vorangestellt ist - gefolgt. Sie hat sich in den vergangenen f{\"u}nf Jahren vom Pilotprojekt zur unverzichtbaren IT-Infrastruktur f{\"u}r zahlreiche Schulen entwickelt. W{\"a}hrend der Corona-Pandemie hat sie f{\"u}r viele Tausend Schulen wichtige Unterst{\"u}tzung bei der Umsetzung ihres Bildungsauftrags geboten. Das Ziel, eine zukunftssichere und datenschutzkonforme Infrastruktur zur digitalen Unterst{\"u}tzung des Unterrichts zur Verf{\"u}gung zu stellen, hat sie damit mehr als erreicht. Aktuell greifen rund 1,4 Millionen Lehrkr{\"a}fte und Sch{\"u}lerinnen und Sch{\"u}ler bundesweit und an den deutschen Auslandsschulen auf die HPI Schul-Cloud zu.}, language = {de} } @book{GerkenUebernickeldePaula2022, author = {Gerken, Stefanie and Uebernickel, Falk and de Paula, Danielly}, title = {Design Thinking: a Global Study on Implementation Practices in Organizations}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-525-5}, doi = {10.25932/publishup-53466}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-534668}, publisher = {Universit{\"a}t Potsdam}, pages = {230}, year = {2022}, abstract = {These days design thinking is no longer a "new approach". Among practitioners, as well as academics, interest in the topic has gathered pace over the last two decades. However, opinions are divided over the longevity of the phenomenon: whether design thinking is merely "old wine in new bottles," a passing trend, or still evolving as it is being spread to an increasing number of organizations and industries. Despite its growing relevance and the diffusion of design thinking, knowledge on the actual status quo in organizations remains scarce. With a new study, the research team of Prof. Uebernickel and Stefanie Gerken investigates temporal developments and changes in design thinking practices in organizations over the past six years comparing the results of the 2015 "Parts without a whole" study with current practices and future developments. Companies of all sizes and from different parts of the world participated in the survey. The findings from qualitative interviews with experts, i.e., people who have years of knowledge with design thinking, were cross-checked with the results from an exploratory analysis of the survey data. This analysis uncovers significant variances and similarities in how design thinking is interpreted and applied in businesses.}, language = {en} } @misc{UllrichVladovaEigelshovenetal.2022, author = {Ullrich, Andr{\´e} and Vladova, Gergana and Eigelshoven, Felix and Renz, Andr{\´e}}, title = {Data mining of scientific research on artificial intelligence in teaching and administration in higher education institutions}, series = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Wirtschafts- und Sozialwissenschaftliche Reihe}, journal = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Wirtschafts- und Sozialwissenschaftliche Reihe}, number = {160}, issn = {1867-5808}, doi = {10.25932/publishup-58907}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-589077}, pages = {18}, year = {2022}, abstract = {Teaching and learning as well as administrative processes are still experiencing intensive changes with the rise of artificial intelligence (AI) technologies and its diverse application opportunities in the context of higher education. Therewith, the scientific interest in the topic in general, but also specific focal points rose as well. However, there is no structured overview on AI in teaching and administration processes in higher education institutions that allows to identify major research topics and trends, and concretizing peculiarities and develops recommendations for further action. To overcome this gap, this study seeks to systematize the current scientific discourse on AI in teaching and administration in higher education institutions. This study identified an (1) imbalance in research on AI in educational and administrative contexts, (2) an imbalance in disciplines and lack of interdisciplinary research, (3) inequalities in cross-national research activities, as well as (4) neglected research topics and paths. In this way, a comparative analysis between AI usage in administration and teaching and learning processes, a systematization of the state of research, an identification of research gaps as well as further research path on AI in higher education institutions are contributed to research.}, language = {en} } @article{UllrichVladovaEigelshovenetal.2022, author = {Ullrich, Andr{\´e} and Vladova, Gergana and Eigelshoven, Felix and Renz, Andr{\´e}}, title = {Data mining of scientific research on artificial intelligence in teaching and administration in higher education institutions}, series = {Discover artificial intelligence}, volume = {2}, journal = {Discover artificial intelligence}, publisher = {Springer}, address = {Cham}, issn = {2731-0809}, doi = {10.1007/s44163-022-00031-7}, pages = {18}, year = {2022}, abstract = {Teaching and learning as well as administrative processes are still experiencing intensive changes with the rise of artificial intelligence (AI) technologies and its diverse application opportunities in the context of higher education. Therewith, the scientific interest in the topic in general, but also specific focal points rose as well. However, there is no structured overview on AI in teaching and administration processes in higher education institutions that allows to identify major research topics and trends, and concretizing peculiarities and develops recommendations for further action. To overcome this gap, this study seeks to systematize the current scientific discourse on AI in teaching and administration in higher education institutions. This study identified an (1) imbalance in research on AI in educational and administrative contexts, (2) an imbalance in disciplines and lack of interdisciplinary research, (3) inequalities in cross-national research activities, as well as (4) neglected research topics and paths. In this way, a comparative analysis between AI usage in administration and teaching and learning processes, a systematization of the state of research, an identification of research gaps as well as further research path on AI in higher education institutions are contributed to research.}, language = {en} } @article{vonSteinauSteinrueckKurth2022, author = {von Steinau-Steinr{\"u}ck, Robert and Kurth, Paula Sophie}, title = {Das reformierte Statusfeststellungsverfahren in der Praxis}, series = {NJW spezial}, volume = {19}, journal = {NJW spezial}, number = {24}, publisher = {C.H. Beck}, address = {M{\"u}nchen}, issn = {1613-4621}, pages = {754 -- 755}, year = {2022}, abstract = {Das Statusfeststellungsverfahren erm{\"o}glicht auf Antrag bei der alleinzust{\"a}ndigen Deutschen Rentenversicherung Bund den Erhalt einer verbindlichen Einsch{\"a}tzung der h{\"a}ufig komplizierten und folgenschweren Abgrenzung einer selbstst{\"a}ndigen T{\"a}tigkeit von einer abh{\"a}ngigen Besch{\"a}ftigung. Zum 1.4.2022 wurde das Statusfeststellungsverfahren umfassend reformiert. In der Praxis haben sich die eingef{\"u}hrten Novellierungen bislang unterschiedlich bew{\"a}hrt.}, language = {de} } @inproceedings{HagemannAbramova2022, author = {Hagemann, Linus and Abramova, Olga}, title = {Crafting audience engagement in social media conversations}, series = {Proceedings of the 55th Hawaii International Conference on System Sciences}, booktitle = {Proceedings of the 55th Hawaii International Conference on System Sciences}, publisher = {HICSS Conference Office University of Hawaii at Manoa}, address = {Honolulu}, isbn = {978-0-9981331-5-7}, pages = {3222 -- 3231}, year = {2022}, abstract = {Observing inconsistent results in prior studies, this paper applies the elaboration likelihood model to investigate the impact of affective and cognitive cues embedded in social media messages on audience engagement during a political event. Leveraging a rich dataset in the context of the 2020 U.S. presidential elections containing more than 3 million tweets, we found the prominence of both cue types. For the overall sample, positivity and sentiment are negatively related to engagement. In contrast, the post-hoc sub-sample analysis of tweets from famous users shows that emotionally charged content is more engaging. The role of sentiment decreases when the number of followers grows and ultimately becomes insignificant for Twitter participants with a vast number of followers. Prosocial orientation ("we-talk") is consistently associated with more likes, comments, and retweets in the overall sample and sub-samples.}, language = {en} }