@article{VitaglianoHameedJiangetal.2023, author = {Vitagliano, Gerardo and Hameed, Mazhar and Jiang, Lan and Reisener, Lucas and Wu, Eugene and Naumann, Felix}, title = {Pollock: a data loading benchmark}, series = {Proceedings of the VLDB Endowment}, volume = {16}, journal = {Proceedings of the VLDB Endowment}, number = {8}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {2150-8097}, doi = {10.14778/3594512.3594518}, pages = {1870 -- 1882}, year = {2023}, abstract = {Any system at play in a data-driven project has a fundamental requirement: the ability to load data. The de-facto standard format to distribute and consume raw data is CSV. Yet, the plain text and flexible nature of this format make such files often difficult to parse and correctly load their content, requiring cumbersome data preparation steps. We propose a benchmark to assess the robustness of systems in loading data from non-standard CSV formats and with structural inconsistencies. First, we formalize a model to describe the issues that affect real-world files and use it to derive a systematic lpollutionz process to generate dialects for any given grammar. Our benchmark leverages the pollution framework for the csv format. To guide pollution, we have surveyed thousands of real-world, publicly available csv files, recording the problems we encountered. We demonstrate the applicability of our benchmark by testing and scoring 16 different systems: popular csv parsing frameworks, relational database tools, spreadsheet systems, and a data visualization tool.}, language = {en} } @inproceedings{MarxBruenkerMirbabaieetal.2024, author = {Marx, Julian and Br{\"u}nker, Felix and Mirbabaie, Milad and Stieglitz, Stefan}, title = {Digital activism on social media}, series = {Proceedings of the 57th Annual Hawaii International Conference on System Sciences}, booktitle = {Proceedings of the 57th Annual Hawaii International Conference on System Sciences}, editor = {Bui, Tung X.}, publisher = {Department of IT Management Shidler College of Business University of Hawaii}, address = {Honolulu, HI}, isbn = {978-0-99813-317-1}, pages = {7205 -- 7214}, year = {2024}, abstract = {Social media constitute an important arena for public debates and steady interchange of issues relevant to society. To boost their reputation, commercial organizations also engage in political, social, or environmental debates on social media. To engage in this type of digital activism, organizations increasingly utilize the social media profiles of executive employees and other brand ambassadors. However, the relationship between brand ambassadors' digital activism and corporate reputation is only vaguely understood. The results of a qualitative inquiry suggest that digital activism via brand ambassadors can be risky (e.g., creating additional surface for firestorms, financial loss) and rewarding (e.g., emitting authenticity, employing 'megaphones' for industry change) at the same time. The paper informs both scholarship and practitioners about strategic trade-offs that need to be considered when employing brand ambassadors for digital activism.}, language = {en} } @inproceedings{MirbabaieRieskampHofeditzetal.2024, author = {Mirbabaie, Milad and Rieskamp, Jonas and Hofeditz, Lennart and Stieglitz, Stefan}, title = {Breaking down barriers}, series = {Proceedings of the 57th Annual Hawaii International Conference on System Sciences}, booktitle = {Proceedings of the 57th Annual Hawaii International Conference on System Sciences}, editor = {Bui, Tung X.}, publisher = {Department of IT Management Shidler College of Business University of Hawaii}, address = {Honolulu, HI}, isbn = {978-0-99813-317-1}, pages = {672 -- 681}, year = {2024}, abstract = {Many researchers hesitate to provide full access to their datasets due to a lack of knowledge about research data management (RDM) tools and perceived fears, such as losing the value of one's own data. Existing tools and approaches often do not take into account these fears and missing knowledge. In this study, we examined how conversational agents (CAs) can provide a natural way of guidance through RDM processes and nudge researchers towards more data sharing. This work offers an online experiment in which researchers interacted with a CA on a self-developed RDM platform and a survey on participants' data sharing behavior. Our findings indicate that the presence of a guiding and enlightening CA on an RDM platform has a constructive influence on both the intention to share data and the actual behavior of data sharing. Notably, individual factors do not appear to impede or hinder this effect.}, language = {en} } @article{NguyenGeorgieKayhanetal.2021, author = {Nguyen, Dong Hai Phuong and Georgie, Yasmin Kim and Kayhan, Ezgi and Eppe, Manfred and Hafner, Verena Vanessa and Wermter, Stefan}, title = {Sensorimotor representation learning for an "active self" in robots}, series = {K{\"u}nstliche Intelligenz : KI ; Forschung, Entwicklung, Erfahrungen ; Organ des Fachbereichs 1 K{\"u}nstliche Intelligenz der Gesellschaft f{\"u}r Informatik e.V., GI / Fachbereich 1 der Gesellschaft f{\"u}r Informatik e.V}, volume = {35}, journal = {K{\"u}nstliche Intelligenz : KI ; Forschung, Entwicklung, Erfahrungen ; Organ des Fachbereichs 1 K{\"u}nstliche Intelligenz der Gesellschaft f{\"u}r Informatik e.V., GI / Fachbereich 1 der Gesellschaft f{\"u}r Informatik e.V}, number = {1}, publisher = {Springer}, address = {Berlin}, issn = {0933-1875}, doi = {10.1007/s13218-021-00703-z}, pages = {9 -- 35}, year = {2021}, abstract = {Safe human-robot interactions require robots to be able to learn how to behave appropriately in spaces populated by people and thus to cope with the challenges posed by our dynamic and unstructured environment, rather than being provided a rigid set of rules for operations. In humans, these capabilities are thought to be related to our ability to perceive our body in space, sensing the location of our limbs during movement, being aware of other objects and agents, and controlling our body parts to interact with them intentionally. Toward the next generation of robots with bio-inspired capacities, in this paper, we first review the developmental processes of underlying mechanisms of these abilities: The sensory representations of body schema, peripersonal space, and the active self in humans. Second, we provide a survey of robotics models of these sensory representations and robotics models of the self; and we compare these models with the human counterparts. Finally, we analyze what is missing from these robotics models and propose a theoretical computational framework, which aims to allow the emergence of the sense of self in artificial agents by developing sensory representations through self-exploration.}, language = {en} } @phdthesis{Huegle2024, author = {Huegle, Johannes}, title = {Causal discovery in practice: Non-parametric conditional independence testing and tooling for causal discovery}, doi = {10.25932/publishup-63582}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-635820}, school = {Universit{\"a}t Potsdam}, pages = {xiv, 156}, year = {2024}, abstract = {Knowledge about causal structures is crucial for decision support in various domains. For example, in discrete manufacturing, identifying the root causes of failures and quality deviations that interrupt the highly automated production process requires causal structural knowledge. However, in practice, root cause analysis is usually built upon individual expert knowledge about associative relationships. But, "correlation does not imply causation", and misinterpreting associations often leads to incorrect conclusions. Recent developments in methods for causal discovery from observational data have opened the opportunity for a data-driven examination. Despite its potential for data-driven decision support, omnipresent challenges impede causal discovery in real-world scenarios. In this thesis, we make a threefold contribution to improving causal discovery in practice. (1) The growing interest in causal discovery has led to a broad spectrum of methods with specific assumptions on the data and various implementations. Hence, application in practice requires careful consideration of existing methods, which becomes laborious when dealing with various parameters, assumptions, and implementations in different programming languages. Additionally, evaluation is challenging due to the lack of ground truth in practice and limited benchmark data that reflect real-world data characteristics. To address these issues, we present a platform-independent modular pipeline for causal discovery and a ground truth framework for synthetic data generation that provides comprehensive evaluation opportunities, e.g., to examine the accuracy of causal discovery methods in case of inappropriate assumptions. (2) Applying constraint-based methods for causal discovery requires selecting a conditional independence (CI) test, which is particularly challenging in mixed discrete-continuous data omnipresent in many real-world scenarios. In this context, inappropriate assumptions on the data or the commonly applied discretization of continuous variables reduce the accuracy of CI decisions, leading to incorrect causal structures. Therefore, we contribute a non-parametric CI test leveraging k-nearest neighbors methods and prove its statistical validity and power in mixed discrete-continuous data, as well as the asymptotic consistency when used in constraint-based causal discovery. An extensive evaluation of synthetic and real-world data shows that the proposed CI test outperforms state-of-the-art approaches in the accuracy of CI testing and causal discovery, particularly in settings with low sample sizes. (3) To show the applicability and opportunities of causal discovery in practice, we examine our contributions in real-world discrete manufacturing use cases. For example, we showcase how causal structural knowledge helps to understand unforeseen production downtimes or adds decision support in case of failures and quality deviations in automotive body shop assembly lines.}, language = {en} } @book{KubanRottaNolteetal.2023, author = {Kuban, Robert and Rotta, Randolf and Nolte, J{\"o}rg and Chromik, Jonas and Beilharz, Jossekin Jakob and Pirl, Lukas and Friedrich, Tobias and Lenzner, Pascal and Weyand, Christopher and Juiz, Carlos and Bermejo, Belen and Sauer, Joao and Coelh, Leandro dos Santos and Najafi, Pejman and P{\"u}nter, Wenzel and Cheng, Feng and Meinel, Christoph and Sidorova, Julia and Lundberg, Lars and Vogel, Thomas and Tran, Chinh and Moser, Irene and Grunske, Lars and Elsaid, Mohamed Esameldin Mohamed and Abbas, Hazem M. and Rula, Anisa and Sejdiu, Gezim and Maurino, Andrea and Schmidt, Christopher and H{\"u}gle, Johannes and Uflacker, Matthias and Nozza, Debora and Messina, Enza and Hoorn, Andr{\´e} van and Frank, Markus and Schulz, Henning and Alhosseini Almodarresi Yasin, Seyed Ali and Nowicki, Marek and Muite, Benson K. and Boysan, Mehmet Can and Bianchi, Federico and Cremaschi, Marco and Moussa, Rim and Abdel-Karim, Benjamin M. and Pfeuffer, Nicolas and Hinz, Oliver and Plauth, Max and Polze, Andreas and Huo, Da and Melo, Gerard de and Mendes Soares, F{\´a}bio and Oliveira, Roberto C{\´e}lio Lim{\~a}o de and Benson, Lawrence and Paul, Fabian and Werling, Christian and Windheuser, Fabian and Stojanovic, Dragan and Djordjevic, Igor and Stojanovic, Natalija and Stojnev Ilic, Aleksandra and Weidmann, Vera and Lowitzki, Leon and Wagner, Markus and Ifa, Abdessatar Ben and Arlos, Patrik and Megia, Ana and Vendrell, Joan and Pfitzner, Bjarne and Redondo, Alberto and R{\´i}os Insua, David and Albert, Justin Amadeus and Zhou, Lin and Arnrich, Bert and Szab{\´o}, Ildik{\´o} and Fodor, Szabina and Ternai, Katalin and Bhowmik, Rajarshi and Campero Durand, Gabriel and Shevchenko, Pavlo and Malysheva, Milena and Prymak, Ivan and Saake, Gunter}, title = {HPI Future SOC Lab - Proceedings 2019}, number = {158}, editor = {Meinel, Christoph and Polze, Andreas and Beins, Karsten and Strotmann, Rolf and Seibold, Ulrich and R{\"o}dszus, Kurt and M{\"u}ller, J{\"u}rgen}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-564-4}, issn = {1613-5652}, doi = {10.25932/publishup-59791}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-597915}, publisher = {Universit{\"a}t Potsdam}, pages = {xi, 301}, year = {2023}, abstract = {The "HPI Future SOC Lab" is a cooperation of the Hasso Plattner Institute (HPI) and industry partners. Its mission is to enable and promote exchange and interaction between the research community and the industry partners. The HPI Future SOC Lab provides researchers with free of charge access to a complete infrastructure of state of the art hard and software. This infrastructure includes components, which might be too expensive for an ordinary research environment, such as servers with up to 64 cores and 2 TB main memory. The offerings address researchers particularly from but not limited to the areas of computer science and business information systems. Main areas of research include cloud computing, parallelization, and In-Memory technologies. This technical report presents results of research projects executed in 2019. Selected projects have presented their results on April 9th and November 12th 2019 at the Future SOC Lab Day events.}, language = {en} } @article{WiemkerBunovaNeufeldetal.2022, author = {Wiemker, Veronika and Bunova, Anna and Neufeld, Maria and Gornyi, Boris and Yurasova, Elena and Konigorski, Stefan and Kalinina, Anna and Kontsevaya, Anna and Ferreira-Borges, Carina and Probst, Charlotte}, title = {Pilot study to evaluate usability and acceptability of the 'Animated Alcohol Assessment Tool' in Russian primary healthcare}, series = {Digital health}, volume = {8}, journal = {Digital health}, publisher = {Sage Publications}, address = {London}, issn = {2055-2076}, doi = {10.1177/20552076211074491}, pages = {11}, year = {2022}, abstract = {Background and aims: Accurate and user-friendly assessment tools quantifying alcohol consumption are a prerequisite to effective prevention and treatment programmes, including Screening and Brief Intervention. Digital tools offer new potential in this field. We developed the 'Animated Alcohol Assessment Tool' (AAA-Tool), a mobile app providing an interactive version of the World Health Organization's Alcohol Use Disorders Identification Test (AUDIT) that facilitates the description of individual alcohol consumption via culturally informed animation features. This pilot study evaluated the Russia-specific version of the Animated Alcohol Assessment Tool with regard to (1) its usability and acceptability in a primary healthcare setting, (2) the plausibility of its alcohol consumption assessment results and (3) the adequacy of its Russia-specific vessel and beverage selection. Methods: Convenience samples of 55 patients (47\% female) and 15 healthcare practitioners (80\% female) in 2 Russian primary healthcare facilities self-administered the Animated Alcohol Assessment Tool and rated their experience on the Mobile Application Rating Scale - User Version. Usage data was automatically collected during app usage, and additional feedback on regional content was elicited in semi-structured interviews. Results: On average, patients completed the Animated Alcohol Assessment Tool in 6:38 min (SD = 2.49, range = 3.00-17.16). User satisfaction was good, with all subscale Mobile Application Rating Scale - User Version scores averaging >3 out of 5 points. A majority of patients (53\%) and practitioners (93\%) would recommend the tool to 'many people' or 'everyone'. Assessed alcohol consumption was plausible, with a low number (14\%) of logically impossible entries. Most patients reported the Animated Alcohol Assessment Tool to reflect all vessels (78\%) and all beverages (71\%) they typically used. Conclusion: High acceptability ratings by patients and healthcare practitioners, acceptable completion time, plausible alcohol usage assessment results and perceived adequacy of region-specific content underline the Animated Alcohol Assessment Tool's potential to provide a novel approach to alcohol assessment in primary healthcare. After its validation, the Animated Alcohol Assessment Tool might contribute to reducing alcohol-related harm by facilitating Screening and Brief Intervention implementation in Russia and beyond.}, language = {en} } @article{OmranianAngeleskaNikoloski2021, author = {Omranian, Sara and Angeleska, Angela and Nikoloski, Zoran}, title = {PC2P}, series = {Bioinformatics}, volume = {37}, journal = {Bioinformatics}, number = {1}, publisher = {Oxford Univ. Press}, address = {Oxford}, issn = {1367-4811}, doi = {10.1093/bioinformatics/btaa1089}, pages = {73 -- 81}, year = {2021}, abstract = {Motivation: Prediction of protein complexes from protein-protein interaction (PPI) networks is an important problem in systems biology, as they control different cellular functions. The existing solutions employ algorithms for network community detection that identify dense subgraphs in PPI networks. However, gold standards in yeast and human indicate that protein complexes can also induce sparse subgraphs, introducing further challenges in protein complex prediction. Results: To address this issue, we formalize protein complexes as biclique spanned subgraphs, which include both sparse and dense subgraphs. We then cast the problem of protein complex prediction as a network partitioning into biclique spanned subgraphs with removal of minimum number of edges, called coherent partition. Since finding a coherent partition is a computationally intractable problem, we devise a parameter-free greedy approximation algorithm, termed Protein Complexes from Coherent Partition (PC2P), based on key properties of biclique spanned subgraphs. Through comparison with nine contenders, we demonstrate that PC2P: (i) successfully identifies modular structure in networks, as a prerequisite for protein complex prediction, (ii) outperforms the existing solutions with respect to a composite score of five performance measures on 75\% and 100\% of the analyzed PPI networks and gold standards in yeast and human, respectively, and (iii,iv) does not compromise GO semantic similarity and enrichment score of the predicted protein complexes. Therefore, our study demonstrates that clustering of networks in terms of biclique spanned subgraphs is a promising framework for detection of complexes in PPI networks.}, language = {en} } @book{ZhangPlauthEberhardtetal.2020, author = {Zhang, Shuhao and Plauth, Max and Eberhardt, Felix and Polze, Andreas and Lehmann, Jens and Sejdiu, Gezim and Jabeen, Hajira and Servadei, Lorenzo and M{\"o}stl, Christian and B{\"a}r, Florian and Netzeband, Andr{\´e} and Schmidt, Rainer and Knigge, Marlene and Hecht, Sonja and Prifti, Loina and Krcmar, Helmut and Sapegin, Andrey and Jaeger, David and Cheng, Feng and Meinel, Christoph and Friedrich, Tobias and Rothenberger, Ralf and Sutton, Andrew M. and Sidorova, Julia A. and Lundberg, Lars and Rosander, Oliver and Sk{\"o}ld, Lars and Di Varano, Igor and van der Walt, Est{\´e}e and Eloff, Jan H. P. and Fabian, Benjamin and Baumann, Annika and Ermakova, Tatiana and Kelkel, Stefan and Choudhary, Yash and Cooray, Thilini and Rodr{\´i}guez, Jorge and Medina-P{\´e}rez, Miguel Angel and Trejo, Luis A. and Barrera-Animas, Ari Yair and Monroy-Borja, Ra{\´u}l and L{\´o}pez-Cuevas, Armando and Ram{\´i}rez-M{\´a}rquez, Jos{\´e} Emmanuel and Grohmann, Maria and Niederleithinger, Ernst and Podapati, Sasidhar and Schmidt, Christopher and Huegle, Johannes and de Oliveira, Roberto C. L. and Soares, F{\´a}bio Mendes and van Hoorn, Andr{\´e} and Neumer, Tamas and Willnecker, Felix and Wilhelm, Mathias and Kuster, Bernhard}, title = {HPI Future SOC Lab - Proceedings 2017}, number = {130}, editor = {Meinel, Christoph and Polze, Andreas and Beins, Karsten and Strotmann, Rolf and Seibold, Ulrich and R{\"o}dszus, Kurt and M{\"u}ller, J{\"u}rgen}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-475-3}, issn = {1613-5652}, doi = {10.25932/publishup-43310}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-433100}, publisher = {Universit{\"a}t Potsdam}, pages = {ix, 235}, year = {2020}, abstract = {The "HPI Future SOC Lab" is a cooperation of the Hasso Plattner Institute (HPI) and industry partners. Its mission is to enable and promote exchange and interaction between the research community and the industry partners. The HPI Future SOC Lab provides researchers with free of charge access to a complete infrastructure of state of the art hard and software. This infrastructure includes components, which might be too expensive for an ordinary research environment, such as servers with up to 64 cores and 2 TB main memory. The offerings address researchers particularly from but not limited to the areas of computer science and business information systems. Main areas of research include cloud computing, parallelization, and In-Memory technologies. This technical report presents results of research projects executed in 2017. Selected projects have presented their results on April 25th and November 15th 2017 at the Future SOC Lab Day events.}, language = {en} } @article{UlrichLutfiRutzenetal.2022, author = {Ulrich, Jens-Uwe and Lutfi, Ahmad and Rutzen, Kilian and Renard, Bernhard Y.}, title = {ReadBouncer}, series = {Bioinformatics}, volume = {38}, journal = {Bioinformatics}, number = {SUPPL 1}, publisher = {Oxford Univ. Press}, address = {Oxford}, issn = {1367-4803}, doi = {10.1093/bioinformatics/btac223}, pages = {153 -- 160}, year = {2022}, abstract = {Motivation: Nanopore sequencers allow targeted sequencing of interesting nucleotide sequences by rejecting other sequences from individual pores. This feature facilitates the enrichment of low-abundant sequences by depleting overrepresented ones in-silico. Existing tools for adaptive sampling either apply signal alignment, which cannot handle human-sized reference sequences, or apply read mapping in sequence space relying on fast graphical processing units (GPU) base callers for real-time read rejection. Using nanopore long-read mapping tools is also not optimal when mapping shorter reads as usually analyzed in adaptive sampling applications. Results: Here, we present a new approach for nanopore adaptive sampling that combines fast CPU and GPU base calling with read classification based on Interleaved Bloom Filters. ReadBouncer improves the potential enrichment of low abundance sequences by its high read classification sensitivity and specificity, outperforming existing tools in the field. It robustly removes even reads belonging to large reference sequences while running on commodity hardware without GPUs, making adaptive sampling accessible for in-field researchers. Readbouncer also provides a user-friendly interface and installer files for end-users without a bioinformatics background.}, language = {en} } @article{WittigMirandaHoelzeretal.2022, author = {Wittig, Alice and Miranda, Fabio Malcher and H{\"o}lzer, Martin and Altenburg, Tom and Bartoszewicz, Jakub Maciej and Beyvers, Sebastian and Dieckmann, Marius Alfred and Genske, Ulrich and Giese, Sven Hans-Joachim and Nowicka, Melania and Richard, Hugues and Schiebenhoefer, Henning and Schmachtenberg, Anna-Juliane and Sieben, Paul and Tang, Ming and Tembrockhaus, Julius and Renard, Bernhard Y. and Fuchs, Stephan}, title = {CovRadar}, series = {Bioinformatics}, volume = {38}, journal = {Bioinformatics}, number = {17}, publisher = {Oxford Univ. Press}, address = {Oxford}, issn = {1367-4803}, doi = {10.1093/bioinformatics/btac411}, pages = {4223 -- 4225}, year = {2022}, abstract = {The ongoing pandemic caused by SARS-CoV-2 emphasizes the importance of genomic surveillance to understand the evolution of the virus, to monitor the viral population, and plan epidemiological responses. Detailed analysis, easy visualization and intuitive filtering of the latest viral sequences are powerful for this purpose. We present CovRadar, a tool for genomic surveillance of the SARS-CoV-2 Spike protein. CovRadar consists of an analytical pipeline and a web application that enable the analysis and visualization of hundreds of thousand sequences. First, CovRadar extracts the regions of interest using local alignment, then builds a multiple sequence alignment, infers variants and consensus and finally presents the results in an interactive app, making accessing and reporting simple, flexible and fast.}, language = {en} } @article{TrautmannZhouBrahmsetal.2021, author = {Trautmann, Justin and Zhou, Lin and Brahms, Clemens Markus and Tunca, Can and Ersoy, Cem and Granacher, Urs and Arnrich, Bert}, title = {TRIPOD}, series = {Data : open access ʻData in scienceʼ journal}, volume = {6}, journal = {Data : open access ʻData in scienceʼ journal}, number = {9}, publisher = {MDPI}, address = {Basel}, issn = {2306-5729}, doi = {10.3390/data6090095}, pages = {19}, year = {2021}, abstract = {Inertial measurement units (IMUs) enable easy to operate and low-cost data recording for gait analysis. When combined with treadmill walking, a large number of steps can be collected in a controlled environment without the need of a dedicated gait analysis laboratory. In order to evaluate existing and novel IMU-based gait analysis algorithms for treadmill walking, a reference dataset that includes IMU data as well as reliable ground truth measurements for multiple participants and walking speeds is needed. This article provides a reference dataset consisting of 15 healthy young adults who walked on a treadmill at three different speeds. Data were acquired using seven IMUs placed on the lower body, two different reference systems (Zebris FDMT-HQ and OptoGait), and two RGB cameras. Additionally, in order to validate an existing IMU-based gait analysis algorithm using the dataset, an adaptable modular data analysis pipeline was built. Our results show agreement between the pressure-sensitive Zebris and the photoelectric OptoGait system (r = 0.99), demonstrating the quality of our reference data. As a use case, the performance of an algorithm originally designed for overground walking was tested on treadmill data using the data pipeline. The accuracy of stride length and stride time estimations was comparable to that reported in other studies with overground data, indicating that the algorithm is equally applicable to treadmill data. The Python source code of the data pipeline is publicly available, and the dataset will be provided by the authors upon request, enabling future evaluations of IMU gait analysis algorithms without the need of recording new data.}, language = {en} } @book{MeinelDoellnerWeskeetal.2021, author = {Meinel, Christoph and D{\"o}llner, J{\"u}rgen Roland Friedrich and Weske, Mathias and Polze, Andreas and Hirschfeld, Robert and Naumann, Felix and Giese, Holger and Baudisch, Patrick and Friedrich, Tobias and B{\"o}ttinger, Erwin and Lippert, Christoph and D{\"o}rr, Christian and Lehmann, Anja and Renard, Bernhard and Rabl, Tilmann and Uebernickel, Falk and Arnrich, Bert and H{\"o}lzle, Katharina}, title = {Proceedings of the HPI Research School on Service-oriented Systems Engineering 2020 Fall Retreat}, number = {138}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-513-2}, issn = {1613-5652}, doi = {10.25932/publishup-50413}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-504132}, publisher = {Universit{\"a}t Potsdam}, pages = {vi, 144}, year = {2021}, abstract = {Design and Implementation of service-oriented architectures imposes a huge number of research questions from the fields of software engineering, system analysis and modeling, adaptability, and application integration. Component orientation and web services are two approaches for design and realization of complex web-based system. Both approaches allow for dynamic application adaptation as well as integration of enterprise application. Service-Oriented Systems Engineering represents a symbiosis of best practices in object-orientation, component-based development, distributed computing, and business process management. It provides integration of business and IT concerns. The annual Ph.D. Retreat of the Research School provides each member the opportunity to present his/her current state of their research and to give an outline of a prospective Ph.D. thesis. Due to the interdisciplinary structure of the research school, this technical report covers a wide range of topics. These include but are not limited to: Human Computer Interaction and Computer Vision as Service; Service-oriented Geovisualization Systems; Algorithm Engineering for Service-oriented Systems; Modeling and Verification of Self-adaptive Service-oriented Systems; Tools and Methods for Software Engineering in Service-oriented Systems; Security Engineering of Service-based IT Systems; Service-oriented Information Systems; Evolutionary Transition of Enterprise Applications to Service Orientation; Operating System Abstractions for Service-oriented Computing; and Services Specification, Composition, and Enactment.}, language = {en} } @article{DeFreitasJohnsonGoldenetal.2021, author = {De Freitas, Jessica K. and Johnson, Kipp W. and Golden, Eddye and Nadkarni, Girish N. and Dudley, Joel T. and B{\"o}ttinger, Erwin and Glicksberg, Benjamin S. and Miotto, Riccardo}, title = {Phe2vec}, series = {Patterns}, volume = {2}, journal = {Patterns}, number = {9}, publisher = {Elsevier}, address = {Amsterdam}, issn = {2666-3899}, doi = {10.1016/j.patter.2021.100337}, pages = {9}, year = {2021}, abstract = {Robust phenotyping of patients from electronic health records (EHRs) at scale is a challenge in clinical informatics. Here, we introduce Phe2vec, an automated framework for disease phenotyping from EHRs based on unsupervised learning and assess its effectiveness against standard rule-based algorithms from Phenotype KnowledgeBase (PheKB). Phe2vec is based on pre-computing embeddings of medical concepts and patients' clinical history. Disease phenotypes are then derived from a seed concept and its neighbors in the embedding space. Patients are linked to a disease if their embedded representation is close to the disease phenotype. Comparing Phe2vec and PheKB cohorts head-to-head using chart review, Phe2vec performed on par or better in nine out of ten diseases. Differently from other approaches, it can scale to any condition and was validated against widely adopted expert-based standards. Phe2vec aims to optimize clinical informatics research by augmenting current frameworks to characterize patients by condition and derive reliable disease cohorts.}, language = {en} } @misc{KonigorskiWernickeSlosareketal.2023, author = {Konigorski, Stefan and Wernicke, Sarah and Slosarek, Tamara and Zenner, Alexander Maximilian and Strelow, Nils and Ruether, Darius Ferenc and Henschel, Florian and Manaswini, Manisha and Pottb{\"a}cker, Fabian and Edelman, Jonathan Antonio and Owoyele, Babajide and Danieletto, Matteo and Golden, Eddye and Zweig, Micol and Nadkarni, Girish N. and B{\"o}ttinger, Erwin}, title = {StudyU: A Platform for Designing and Conducting Innovative Digital N-of-1 Trials}, series = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, journal = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, number = {12}, doi = {10.25932/publishup-58037}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-580370}, pages = {12}, year = {2023}, abstract = {N-of-1 trials are the gold standard study design to evaluate individual treatment effects and derive personalized treatment strategies. Digital tools have the potential to initiate a new era of N-of-1 trials in terms of scale and scope, but fully functional platforms are not yet available. Here, we present the open source StudyU platform, which includes the StudyU Designer and StudyU app. With the StudyU Designer, scientists are given a collaborative web application to digitally specify, publish, and conduct N-of-1 trials. The StudyU app is a smartphone app with innovative user-centric elements for participants to partake in trials published through the StudyU Designer to assess the effects of different interventions on their health. Thereby, the StudyU platform allows clinicians and researchers worldwide to easily design and conduct digital N-of-1 trials in a safe manner. We envision that StudyU can change the landscape of personalized treatments both for patients and healthy individuals, democratize and personalize evidence generation for self-optimization and medicine, and can be integrated in clinical practice.}, language = {en} } @article{FreitasdaCruzPfahringerMartensenetal.2021, author = {Freitas da Cruz, Harry and Pfahringer, Boris and Martensen, Tom and Schneider, Frederic and Meyer, Alexander and B{\"o}ttinger, Erwin and Schapranow, Matthieu-Patrick}, title = {Using interpretability approaches to update "black-box" clinical prediction models}, series = {Artificial intelligence in medicine : AIM}, volume = {111}, journal = {Artificial intelligence in medicine : AIM}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0933-3657}, doi = {10.1016/j.artmed.2020.101982}, pages = {13}, year = {2021}, abstract = {Despite advances in machine learning-based clinical prediction models, only few of such models are actually deployed in clinical contexts. Among other reasons, this is due to a lack of validation studies. In this paper, we present and discuss the validation results of a machine learning model for the prediction of acute kidney injury in cardiac surgery patients initially developed on the MIMIC-III dataset when applied to an external cohort of an American research hospital. To help account for the performance differences observed, we utilized interpretability methods based on feature importance, which allowed experts to scrutinize model behavior both at the global and local level, making it possible to gain further insights into why it did not behave as expected on the validation cohort. The knowledge gleaned upon derivation can be potentially useful to assist model update during validation for more generalizable and simpler models. We argue that interpretability methods should be considered by practitioners as a further tool to help explain performance differences and inform model update in validation studies.}, language = {en} } @book{AdrianoBleifussChengetal.2019, author = {Adriano, Christian and Bleifuß, Tobias and Cheng, Lung-Pan and Diba, Kiarash and Fricke, Andreas and Grapentin, Andreas and Jiang, Lan and Kovacs, Robert and Krejca, Martin Stefan and Mandal, Sankalita and Marwecki, Sebastian and Matthies, Christoph and Mattis, Toni and Niephaus, Fabio and Pirl, Lukas and Quinzan, Francesco and Ramson, Stefan and Rezaei, Mina and Risch, Julian and Rothenberger, Ralf and Roumen, Thijs and Stojanovic, Vladeta and Wolf, Johannes}, title = {Technical report}, number = {129}, editor = {Meinel, Christoph and Plattner, Hasso and D{\"o}llner, J{\"u}rgen Roland Friedrich and Weske, Mathias and Polze, Andreas and Hirschfeld, Robert and Naumann, Felix and Giese, Holger and Baudisch, Patrick and Friedrich, Tobias and B{\"o}ttinger, Erwin and Lippert, Christoph}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-465-4}, issn = {1613-5652}, doi = {10.25932/publishup-42753}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-427535}, publisher = {Universit{\"a}t Potsdam}, pages = {vi, 267}, year = {2019}, abstract = {Design and Implementation of service-oriented architectures imposes a huge number of research questions from the fields of software engineering, system analysis and modeling, adaptability, and application integration. Component orientation and web services are two approaches for design and realization of complex web-based system. Both approaches allow for dynamic application adaptation as well as integration of enterprise application. Commonly used technologies, such as J2EE and .NET, form de facto standards for the realization of complex distributed systems. Evolution of component systems has lead to web services and service-based architectures. This has been manifested in a multitude of industry standards and initiatives such as XML, WSDL UDDI, SOAP, etc. All these achievements lead to a new and promising paradigm in IT systems engineering which proposes to design complex software solutions as collaboration of contractually defined software services. Service-Oriented Systems Engineering represents a symbiosis of best practices in object-orientation, component-based development, distributed computing, and business process management. It provides integration of business and IT concerns. The annual Ph.D. Retreat of the Research School provides each member the opportunity to present his/her current state of their research and to give an outline of a prospective Ph.D. thesis. Due to the interdisciplinary structure of the research school, this technical report covers a wide range of topics. These include but are not limited to: Human Computer Interaction and Computer Vision as Service; Service-oriented Geovisualization Systems; Algorithm Engineering for Service-oriented Systems; Modeling and Verification of Self-adaptive Service-oriented Systems; Tools and Methods for Software Engineering in Service-oriented Systems; Security Engineering of Service-based IT Systems; Service-oriented Information Systems; Evolutionary Transition of Enterprise Applications to Service Orientation; Operating System Abstractions for Service-oriented Computing; and Services Specification, Composition, and Enactment.}, language = {en} } @article{BorchertMockTomczaketal.2021, author = {Borchert, Florian and Mock, Andreas and Tomczak, Aurelie and H{\"u}gel, Jonas and Alkarkoukly, Samer and Knurr, Alexander and Volckmar, Anna-Lena and Stenzinger, Albrecht and Schirmacher, Peter and Debus, J{\"u}rgen and J{\"a}ger, Dirk and Longerich, Thomas and Fr{\"o}hling, Stefan and Eils, Roland and Bougatf, Nina and Sax, Ulrich and Schapranow, Matthieu-Patrick}, title = {Knowledge bases and software support for variant interpretation in precision oncology}, series = {Briefings in bioinformatics}, volume = {22}, journal = {Briefings in bioinformatics}, number = {6}, publisher = {Oxford Univ. Press}, address = {Oxford}, issn = {1467-5463}, doi = {10.1093/bib/bbab134}, pages = {17}, year = {2021}, abstract = {Precision oncology is a rapidly evolving interdisciplinary medical specialty. Comprehensive cancer panels are becoming increasingly available at pathology departments worldwide, creating the urgent need for scalable cancer variant annotation and molecularly informed treatment recommendations. A wealth of mainly academia-driven knowledge bases calls for software tools supporting the multi-step diagnostic process. We derive a comprehensive list of knowledge bases relevant for variant interpretation by a review of existing literature followed by a survey among medical experts from university hospitals in Germany. In addition, we review cancer variant interpretation tools, which integrate multiple knowledge bases. We categorize the knowledge bases along the diagnostic process in precision oncology and analyze programmatic access options as well as the integration of knowledge bases into software tools. The most commonly used knowledge bases provide good programmatic access options and have been integrated into a range of software tools. For the wider set of knowledge bases, access options vary across different parts of the diagnostic process. Programmatic access is limited for information regarding clinical classifications of variants and for therapy recommendations. The main issue for databases used for biological classification of pathogenic variants and pathway context information is the lack of standardized interfaces. There is no single cancer variant interpretation tool that integrates all identified knowledge bases. Specialized tools are available and need to be further developed for different steps in the diagnostic process.}, language = {en} } @article{XinYingTiberiusAlnooretal.2024, author = {XinYing, Chew and Tiberius, Victor and Alnoor, Alhamzah and Camilleri, Mark and Khaw, Khai Wah}, title = {The dark side of metaverse: a multi-perspective of deviant behaviors from PLS-SEM and fsQCA findings}, series = {International journal of human-computer interaction}, journal = {International journal of human-computer interaction}, publisher = {Taylor \& Francis}, address = {London}, issn = {1044-7318}, doi = {10.1080/10447318.2024.2331875}, pages = {21}, year = {2024}, abstract = {The metaverse has created a huge buzz of interest because such a phenomenon is emerging. The behavioral aspect of the metaverse includes user engagement and deviant behaviors in the metaverse. Such technology has brought various dangers to individuals and society. There are growing cases reported of sexual abuse, racism, harassment, hate speech, and bullying because of online disinhibition make us feel more relaxed. This study responded to the literature call by investigating the effect of technical and social features through mediating roles of security and privacy on deviant behaviors in the metaverse. The data collected from virtual network users reached 1121 respondents. Partial Least Squares based structural equation modeling (PLS-SEM) and fuzzy set Qualitative Comparative Analysis (fsQCA) were used. PLS-SEM results revealed that social features such as user-to-user interaction, homophily, social ties, and social identity, and technical design such as immersive experience and invisibility significantly affect users' deviant behavior in the metaverse. The fsQCA results provided insights into the multiple causal solutions and configurations. This study is exceptional because it provided decisive results by understanding the deviant behavior of users based on the symmetrical and asymmetrical approach to virtual networks.}, language = {en} } @article{KuehlerDrathschmidtGrossmann2024, author = {K{\"u}hler, Jakob and Drathschmidt, Nicolas and Großmann, Daniela}, title = {'Modern talking'}, series = {Information polity}, volume = {29}, journal = {Information polity}, number = {2}, publisher = {IOS Press}, address = {Amsterdam}, issn = {1570-1255}, doi = {10.3233/IP-230059}, pages = {199 -- 216}, year = {2024}, abstract = {Despite growing interest, we lack a clear understanding of how the arguably ambiguous phenomenon of agile is perceived in government practice. This study aims to alleviate this puzzle by investigating how managers and employees in German public sector organisations make sense of agile as a spreading management fashion in the form of narratives. This is important because narratives function as innovation carriers that ultimately influence the manifestations of the concept in organisations. Based on a multi-case study of 31 interviews and 24 responses to a qualitative online survey conducted in 2021 and 2022, we provide insights into what public sector managers, employees and consultants understand (and, more importantly, do not understand) as agile and how they weave it into their existing reality of bureaucratic organisations. We uncover three meta-narratives of agile government, which we label 'renew', 'complement' and 'integrate'. In particular, the meta-narratives differ in their positioning of how agile interacts with the characteristics of bureaucratic organisations. Importantly, we also show that agile as a management fad serves as a projection surface for what actors want from a modern and digital organisation. Thus, the vocabulary of agile government within the narratives is inherently linked to other diffusing phenomena such as new work or digitalisation.}, language = {en} }