@article{RichlyBrauerSchlosser2020, author = {Richly, Keven and Brauer, Janos and Schlosser, Rainer}, title = {Predicting location probabilities of drivers to improved dispatch decisions of transportation network companies based on trajectory data}, series = {Proceedings of the 9th International Conference on Operations Research and Enterprise Systems - ICORES}, journal = {Proceedings of the 9th International Conference on Operations Research and Enterprise Systems - ICORES}, publisher = {Springer}, address = {Berlin}, pages = {12}, year = {2020}, abstract = {The demand for peer-to-peer ridesharing services increased over the last years rapidly. To cost-efficiently dispatch orders and communicate accurate pick-up times is challenging as the current location of each available driver is not exactly known since observed locations can be outdated for several seconds. The developed trajectory visualization tool enables transportation network companies to analyze dispatch processes and determine the causes of unexpected delays. As dispatching algorithms are based on the accuracy of arrival time predictions, we account for factors like noise, sample rate, technical and economic limitations as well as the duration of the entire process as they have an impact on the accuracy of spatio-temporal data. To improve dispatching strategies, we propose a prediction approach that provides a probability distribution for a driver's future locations based on patterns observed in past trajectories. We demonstrate the capabilities of our prediction results to ( i) avoid critical delays, (ii) to estimate waiting times with higher confidence, and (iii) to enable risk considerations in dispatching strategies.}, language = {en} } @article{SoechtingTrapp2020, author = {S{\"o}chting, Maximilian and Trapp, Matthias}, title = {Controlling image-stylization techniques using eye tracking}, series = {Science and Technology Publications}, journal = {Science and Technology Publications}, publisher = {Springer}, address = {Berlin}, issn = {2184-4321}, pages = {10}, year = {2020}, abstract = {With the spread of smart phones capable of taking high-resolution photos and the development of high-speed mobile data infrastructure, digital visual media is becoming one of the most important forms of modern communication. With this development, however, also comes a devaluation of images as a media form with the focus becoming the frequency at which visual content is generated instead of the quality of the content. In this work, an interactive system using image-abstraction techniques and an eye tracking sensor is presented, which allows users to experience diverting and dynamic artworks that react to their eye movement. The underlying modular architecture enables a variety of different interaction techniques that share common design principles, making the interface as intuitive as possible. The resulting experience allows users to experience a game-like interaction in which they aim for a reward, the artwork, while being held under constraints, e.g., not blinking. The co nscious eye movements that are required by some interaction techniques hint an interesting, possible future extension for this work into the field of relaxation exercises and concentration training.}, language = {en} } @article{VanHoutTachmazidouBackmanetal.2020, author = {Van Hout, Cristopher V. and Tachmazidou, Ioanna and Backman, Joshua D. and Hoffman, Joshua D. and Liu, Daren and Pandey, Ashutosh K. and Gonzaga-Jauregui, Claudia and Khalid, Shareef and Ye, Bin and Banerjee, Nilanjana and Li, Alexander H. and O'Dushlaine, Colm and Marcketta, Anthony and Staples, Jeffrey and Schurmann, Claudia and Hawes, Alicia and Maxwell, Evan and Barnard, Leland and Lopez, Alexander and Penn, John and Habegger, Lukas and Blumenfeld, Andrew L. and Bai, Xiaodong and O'Keeffe, Sean and Yadav, Ashish and Praveen, Kavita and Jones, Marcus and Salerno, William J. and Chung, Wendy K. and Surakka, Ida and Willer, Cristen J. and Hveem, Kristian and Leader, Joseph B. and Carey, David J. and Ledbetter, David H. and Cardon, Lon and Yancopoulos, George D. and Economides, Aris and Coppola, Giovanni and Shuldiner, Alan R. and Balasubramanian, Suganthi and Cantor, Michael and Nelson, Matthew R. and Whittaker, John and Reid, Jeffrey G. and Marchini, Jonathan and Overton, John D. and Scott, Robert A. and Abecasis, Goncalo R. and Yerges-Armstrong, Laura M. and Baras, Aris}, title = {Exome sequencing and characterization of 49,960 individuals in the UK Biobank}, series = {Nature : the international weekly journal of science}, volume = {586}, journal = {Nature : the international weekly journal of science}, number = {7831}, publisher = {Macmillan Publishers Limited}, address = {London}, organization = {Regeneron Genetics Ctr}, issn = {0028-0836}, doi = {10.1038/s41586-020-2853-0}, pages = {749 -- 756}, year = {2020}, abstract = {The UK Biobank is a prospective study of 502,543 individuals, combining extensive phenotypic and genotypic data with streamlined access for researchers around the world(1). Here we describe the release of exome-sequence data for the first 49,960 study participants, revealing approximately 4 million coding variants (of which around 98.6\% have a frequency of less than 1\%). The data include 198,269 autosomal predicted loss-of-function (LOF) variants, a more than 14-fold increase compared to the imputed sequence. Nearly all genes (more than 97\%) had at least one carrier with a LOF variant, and most genes (more than 69\%) had at least ten carriers with a LOF variant. We illustrate the power of characterizing LOF variants in this population through association analyses across 1,730 phenotypes. In addition to replicating established associations, we found novel LOF variants with large effects on disease traits, includingPIEZO1on varicose veins,COL6A1on corneal resistance,MEPEon bone density, andIQGAP2andGMPRon blood cell traits. We further demonstrate the value of exome sequencing by surveying the prevalence of pathogenic variants of clinical importance, and show that 2\% of this population has a medically actionable variant. Furthermore, we characterize the penetrance of cancer in carriers of pathogenicBRCA1andBRCA2variants. Exome sequences from the first 49,960 participants highlight the promise of genome sequencing in large population-based studies and are now accessible to the scientific community.
Exome sequences from the first 49,960 participants in the UK Biobank highlight the promise of genome sequencing in large population-based studies and are now accessible to the scientific community.}, language = {en} } @article{ChristopherAshwoodBittremieuxDeutschetal.2020, author = {Christopher Ashwood, Wout Bittremieux and Bittremieux, Wout and Deutsch, Eric W. and Doncheva, Nadezhda T. and Dorfer, Viktoria and Gabriels, Ralf and Gorshkov, Vladimir and Gupta, Surya and Jones, Andrew R. and K{\"a}ll, Lukas and Kopczynski, Dominik and Lane, Lydie and Lautenbacher, Ludwig and Legeay, Marc and Locard-Paulet, Marie and Mesuere, Bart and Sachsenberg, Timo and Salz, Renee and Samaras, Patroklos and Schiebenhoefer, Henning and Schmidt, Tobias and Schw{\"a}mmle, Veit and Soggiu, Alessio and Uszkoreit, Julian and Van Den Bossche, Tim and Van Puyvelde, Bart and Van Strien, Joeri and Verschaffelt, Pieter and Webel, Henry and Willems, Sander and Perez-Riverolab, Yasset and Netz, Eugen and Pfeuffer, Julianus}, title = {Proceedings of the EuBIC-MS 2020 Developers' Meeting}, series = {EuPA Open Proteomics}, volume = {24}, journal = {EuPA Open Proteomics}, publisher = {Elsevier}, address = {Amsterdam}, issn = {2212-9685}, doi = {10.1016/j.euprot.2020.11.001}, pages = {1 -- 6}, year = {2020}, abstract = {The 2020 European Bioinformatics Community for Mass Spectrometry (EuBIC-MS) Developers' meeting was held from January 13th to January 17th 2020 in Nyborg, Denmark. Among the participants were scientists as well as developers working in the field of computational mass spectrometry (MS) and proteomics. The 4-day program was split between introductory keynote lectures and parallel hackathon sessions. During the latter, the participants developed bioinformatics tools and resources addressing outstanding needs in the community. The hackathons allowed less experienced participants to learn from more advanced computational MS experts, and to actively contribute to highly relevant research projects. We successfully produced several new tools that will be useful to the proteomics community by improving data analysis as well as facilitating future research. All keynote recordings are available on https://doi.org/10.5281/zenodo.3890181.}, language = {en} } @article{ScheibelTrappLimbergeretal.2020, author = {Scheibel, Willy and Trapp, Matthias and Limberger, Daniel and D{\"o}llner, J{\"u}rgen Roland Friedrich}, title = {A taxonomy of treemap visualization techniques}, series = {Science and Technology Publications}, journal = {Science and Technology Publications}, publisher = {Springer}, address = {Berlin}, pages = {8}, year = {2020}, abstract = {A treemap is a visualization that has been specifically designed to facilitate the exploration of tree-structured data and, more general, hierarchically structured data. The family of visualization techniques that use a visual metaphor for parent-child relationships based "on the property of containment" (Johnson, 1993) is commonly referred to as treemaps. However, as the number of variations of treemaps grows, it becomes increasingly important to distinguish clearly between techniques and their specific characteristics. This paper proposes to discern between Space-filling Treemap TS, Containment Treemap TC, Implicit Edge Representation Tree TIE, and Mapped Tree TMT for classification of hierarchy visualization techniques and highlights their respective properties. This taxonomy is created as a hyponymy, i.e., its classes have an is-a relationship to one another: TS TC TIE TMT. With this proposal, we intend to stimulate a discussion on a more unambiguous classification of treemaps and, furthermore, broaden what is understood by the concept of treemap itself.}, language = {en} } @phdthesis{Dyck2020, author = {Dyck, Johannes}, title = {Verification of graph transformation systems with k-inductive invariants}, doi = {10.25932/publishup-44274}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-442742}, school = {Universit{\"a}t Potsdam}, pages = {X, 364}, year = {2020}, abstract = {With rising complexity of today's software and hardware systems and the hypothesized increase in autonomous, intelligent, and self-* systems, developing correct systems remains an important challenge. Testing, although an important part of the development and maintainance process, cannot usually establish the definite correctness of a software or hardware system - especially when systems have arbitrarily large or infinite state spaces or an infinite number of initial states. This is where formal verification comes in: given a representation of the system in question in a formal framework, verification approaches and tools can be used to establish the system's adherence to its similarly formalized specification, and to complement testing. One such formal framework is the field of graphs and graph transformation systems. Both are powerful formalisms with well-established foundations and ongoing research that can be used to describe complex hardware or software systems with varying degrees of abstraction. Since their inception in the 1970s, graph transformation systems have continuously evolved; related research spans extensions of expressive power, graph algorithms, and their implementation, application scenarios, or verification approaches, to name just a few topics. This thesis focuses on a verification approach for graph transformation systems called k-inductive invariant checking, which is an extension of previous work on 1-inductive invariant checking. Instead of exhaustively computing a system's state space, which is a common approach in model checking, 1-inductive invariant checking symbolically analyzes graph transformation rules - i.e. system behavior - in order to draw conclusions with respect to the validity of graph constraints in the system's state space. The approach is based on an inductive argument: if a system's initial state satisfies a graph constraint and if all rules preserve that constraint's validity, we can conclude the constraint's validity in the system's entire state space - without having to compute it. However, inductive invariant checking also comes with a specific drawback: the locality of graph transformation rules leads to a lack of context information during the symbolic analysis of potential rule applications. This thesis argues that this lack of context can be partly addressed by using k-induction instead of 1-induction. A k-inductive invariant is a graph constraint whose validity in a path of k-1 rule applications implies its validity after any subsequent rule application - as opposed to a 1-inductive invariant where only one rule application is taken into account. Considering a path of transformations then accumulates more context of the graph rules' applications. As such, this thesis extends existing research and implementation on 1-inductive invariant checking for graph transformation systems to k-induction. In addition, it proposes a technique to perform the base case of the inductive argument in a symbolic fashion, which allows verification of systems with an infinite set of initial states. Both k-inductive invariant checking and its base case are described in formal terms. Based on that, this thesis formulates theorems and constructions to apply this general verification approach for typed graph transformation systems and nested graph constraints - and to formally prove the approach's correctness. Since unrestricted graph constraints may lead to non-termination or impracticably high execution times given a hypothetical implementation, this thesis also presents a restricted verification approach, which limits the form of graph transformation systems and graph constraints. It is formalized, proven correct, and its procedures terminate by construction. This restricted approach has been implemented in an automated tool and has been evaluated with respect to its applicability to test cases, its performance, and its degree of completeness.}, language = {en} } @phdthesis{Harmouch2020, author = {Harmouch, Hazar}, title = {Single-column data profiling}, doi = {10.25932/publishup-47455}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-474554}, school = {Universit{\"a}t Potsdam}, pages = {x, 115}, year = {2020}, abstract = {The research area of data profiling consists of a large set of methods and processes to examine a given dataset and determine metadata about it. Typically, different data profiling tasks address different kinds of metadata, comprising either various statistics about individual columns (Single-column Analysis) or relationships among them (Dependency Discovery). Among the basic statistics about a column are data type, header, the number of unique values (the column's cardinality), maximum and minimum values, the number of null values, and the value distribution. Dependencies involve, for instance, functional dependencies (FDs), inclusion dependencies (INDs), and their approximate versions. Data profiling has a wide range of conventional use cases, namely data exploration, cleansing, and integration. The produced metadata is also useful for database management and schema reverse engineering. Data profiling has also more novel use cases, such as big data analytics. The generated metadata describes the structure of the data at hand, how to import it, what it is about, and how much of it there is. Thus, data profiling can be considered as an important preparatory task for many data analysis and mining scenarios to assess which data might be useful and to reveal and understand a new dataset's characteristics. In this thesis, the main focus is on the single-column analysis class of data profiling tasks. We study the impact and the extraction of three of the most important metadata about a column, namely the cardinality, the header, and the number of null values. First, we present a detailed experimental study of twelve cardinality estimation algorithms. We classify the algorithms and analyze their efficiency, scaling far beyond the original experiments and testing theoretical guarantees. Our results highlight their trade-offs and point out the possibility to create a parallel or a distributed version of these algorithms to cope with the growing size of modern datasets. Then, we present a fully automated, multi-phase system to discover human-understandable, representative, and consistent headers for a target table in cases where headers are missing, meaningless, or unrepresentative for the column values. Our evaluation on Wikipedia tables shows that 60\% of the automatically discovered schemata are exact and complete. Considering more schema candidates, top-5 for example, increases this percentage to 72\%. Finally, we formally and experimentally show the ghost and fake FDs phenomenon caused by FD discovery over datasets with missing values. We propose two efficient scores, probabilistic and likelihood-based, for estimating the genuineness of a discovered FD. Our extensive set of experiments on real-world and semi-synthetic datasets show the effectiveness and efficiency of these scores.}, language = {en} } @phdthesis{Taeumel2020, author = {Taeumel, Marcel}, title = {Data-driven tool construction in exploratory programming environments}, doi = {10.25932/publishup-44428}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-444289}, school = {Universit{\"a}t Potsdam}, pages = {xiv, 299}, year = {2020}, abstract = {This work presents a new design for programming environments that promote the exploration of domain-specific software artifacts and the construction of graphical tools for such program comprehension tasks. In complex software projects, tool building is essential because domain- or task-specific tools can support decision making by representing concerns concisely with low cognitive effort. In contrast, generic tools can only support anticipated scenarios, which usually align with programming language concepts or well-known project domains. However, the creation and modification of interactive tools is expensive because the glue that connects data to graphics is hard to find, change, and test. Even if valuable data is available in a common format and even if promising visualizations could be populated, programmers have to invest many resources to make changes in the programming environment. Consequently, only ideas of predictably high value will be implemented. In the non-graphical, command-line world, the situation looks different and inspiring: programmers can easily build their own tools as shell scripts by configuring and combining filter programs to process data. We propose a new perspective on graphical tools and provide a concept to build and modify such tools with a focus on high quality, low effort, and continuous adaptability. That is, (1) we propose an object-oriented, data-driven, declarative scripting language that reduces the amount of and governs the effects of glue code for view-model specifications, and (2) we propose a scalable UI-design language that promotes short feedback loops in an interactive, graphical environment such as Morphic known from Self or Squeak/Smalltalk systems. We implemented our concept as a tool building environment, which we call VIVIDE, on top of Squeak/Smalltalk and Morphic. We replaced existing code browsing and debugging tools to iterate within our solution more quickly. In several case studies with undergraduate and graduate students, we observed that VIVIDE can be applied to many domains such as live language development, source-code versioning, modular code browsing, and multi-language debugging. Then, we designed a controlled experiment to measure the effect on the time to build tools. Several pilot runs showed that training is crucial and, presumably, takes days or weeks, which implies a need for further research. As a result, programmers as users can directly work with tangible representations of their software artifacts in the VIVIDE environment. Tool builders can write domain-specific scripts to populate views to approach comprehension tasks from different angles. Our novel perspective on graphical tools can inspire the creation of new trade-offs in modularity for both data providers and view designers.}, language = {en} } @article{RischKrestel2020, author = {Risch, Julian and Krestel, Ralf}, title = {Toxic comment detection in online discussions}, series = {Deep learning-based approaches for sentiment analysis}, journal = {Deep learning-based approaches for sentiment analysis}, editor = {Agarwal, Basant and Nayak, Richi and Mittal, Namita and Patnaik, Srikanta}, publisher = {Springer}, address = {Singapore}, isbn = {978-981-15-1216-2}, issn = {2524-7565}, doi = {10.1007/978-981-15-1216-2_4}, pages = {85 -- 109}, year = {2020}, abstract = {Comment sections of online news platforms are an essential space to express opinions and discuss political topics. In contrast to other online posts, news discussions are related to particular news articles, comments refer to each other, and individual conversations emerge. However, the misuse by spammers, haters, and trolls makes costly content moderation necessary. Sentiment analysis can not only support moderation but also help to understand the dynamics of online discussions. A subtask of content moderation is the identification of toxic comments. To this end, we describe the concept of toxicity and characterize its subclasses. Further, we present various deep learning approaches, including datasets and architectures, tailored to sentiment analysis in online discussions. One way to make these approaches more comprehensible and trustworthy is fine-grained instead of binary comment classification. On the downside, more classes require more training data. Therefore, we propose to augment training data by using transfer learning. We discuss real-world applications, such as semi-automated comment moderation and troll detection. Finally, we outline future challenges and current limitations in light of most recent research publications.}, language = {en} } @article{JiangNaumann2020, author = {Jiang, Lan and Naumann, Felix}, title = {Holistic primary key and foreign key detection}, series = {Journal of intelligent information systems : JIIS}, volume = {54}, journal = {Journal of intelligent information systems : JIIS}, number = {3}, publisher = {Springer}, address = {Dordrecht}, issn = {0925-9902}, doi = {10.1007/s10844-019-00562-z}, pages = {439 -- 461}, year = {2020}, abstract = {Primary keys (PKs) and foreign keys (FKs) are important elements of relational schemata in various applications, such as query optimization and data integration. However, in many cases, these constraints are unknown or not documented. Detecting them manually is time-consuming and even infeasible in large-scale datasets. We study the problem of discovering primary keys and foreign keys automatically and propose an algorithm to detect both, namely Holistic Primary Key and Foreign Key Detection (HoPF). PKs and FKs are subsets of the sets of unique column combinations (UCCs) and inclusion dependencies (INDs), respectively, for which efficient discovery algorithms are known. Using score functions, our approach is able to effectively extract the true PKs and FKs from the vast sets of valid UCCs and INDs. Several pruning rules are employed to speed up the procedure. We evaluate precision and recall on three benchmarks and two real-world datasets. The results show that our method is able to retrieve on average 88\% of all primary keys, and 91\% of all foreign keys. We compare the performance of HoPF with two baseline approaches that both assume the existence of primary keys.}, language = {en} } @phdthesis{Koumarelas2020, author = {Koumarelas, Ioannis}, title = {Data preparation and domain-agnostic duplicate detection}, doi = {10.25932/publishup-48913}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-489131}, school = {Universit{\"a}t Potsdam}, pages = {x, 97}, year = {2020}, abstract = {Successfully completing any data science project demands careful consideration across its whole process. Although the focus is often put on later phases of the process, in practice, experts spend more time in earlier phases, preparing data, to make them consistent with the systems' requirements or to improve their models' accuracies. Duplicate detection is typically applied during the data cleaning phase, which is dedicated to removing data inconsistencies and improving the overall quality and usability of data. While data cleaning involves a plethora of approaches to perform specific operations, such as schema alignment and data normalization, the task of detecting and removing duplicate records is particularly challenging. Duplicates arise when multiple records representing the same entities exist in a database. Due to numerous reasons, spanning from simple typographical errors to different schemas and formats of integrated databases. Keeping a database free of duplicates is crucial for most use-cases, as their existence causes false negatives and false positives when matching queries against it. These two data quality issues have negative implications for tasks, such as hotel booking, where users may erroneously select a wrong hotel, or parcel delivery, where a parcel can get delivered to the wrong address. Identifying the variety of possible data issues to eliminate duplicates demands sophisticated approaches. While research in duplicate detection is well-established and covers different aspects of both efficiency and effectiveness, our work in this thesis focuses on the latter. We propose novel approaches to improve data quality before duplicate detection takes place and apply the latter in datasets even when prior labeling is not available. Our experiments show that improving data quality upfront can increase duplicate classification results by up to 19\%. To this end, we propose two novel pipelines that select and apply generic as well as address-specific data preparation steps with the purpose of maximizing the success of duplicate detection. Generic data preparation, such as the removal of special characters, can be applied to any relation with alphanumeric attributes. When applied, data preparation steps are selected only for attributes where there are positive effects on pair similarities, which indirectly affect classification, or on classification directly. Our work on addresses is twofold; first, we consider more domain-specific approaches to improve the quality of values, and, second, we experiment with known and modified versions of similarity measures to select the most appropriate per address attribute, e.g., city or country. To facilitate duplicate detection in applications where gold standard annotations are not available and obtaining them is not possible or too expensive, we propose MDedup. MDedup is a novel, rule-based, and fully automatic duplicate detection approach that is based on matching dependencies. These dependencies can be used to detect duplicates and can be discovered using state-of-the-art algorithms efficiently and without any prior labeling. MDedup uses two pipelines to first train on datasets with known labels, learning to identify useful matching dependencies, and then be applied on unseen datasets, regardless of any existing gold standard. Finally, our work is accompanied by open source code to enable repeatability of our research results and application of our approaches to other datasets.}, language = {en} } @misc{SoechtingTrapp2020, author = {S{\"o}chting, Maximilian and Trapp, Matthias}, title = {Controlling image-stylization techniques using eye tracking}, series = {Postprints der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, journal = {Postprints der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, number = {7}, doi = {10.25932/publishup-52471}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-524717}, pages = {12}, year = {2020}, abstract = {With the spread of smart phones capable of taking high-resolution photos and the development of high-speed mobile data infrastructure, digital visual media is becoming one of the most important forms of modern communication. With this development, however, also comes a devaluation of images as a media form with the focus becoming the frequency at which visual content is generated instead of the quality of the content. In this work, an interactive system using image-abstraction techniques and an eye tracking sensor is presented, which allows users to experience diverting and dynamic artworks that react to their eye movement. The underlying modular architecture enables a variety of different interaction techniques that share common design principles, making the interface as intuitive as possible. The resulting experience allows users to experience a game-like interaction in which they aim for a reward, the artwork, while being held under constraints, e.g., not blinking. The co nscious eye movements that are required by some interaction techniques hint an interesting, possible future extension for this work into the field of relaxation exercises and concentration training.}, language = {en} } @misc{ScheibelTrappLimbergeretal.2020, author = {Scheibel, Willy and Trapp, Matthias and Limberger, Daniel and D{\"o}llner, J{\"u}rgen Roland Friedrich}, title = {A taxonomy of treemap visualization techniques}, series = {Postprints der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, journal = {Postprints der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, number = {8}, doi = {10.25932/publishup-52469}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-524693}, pages = {10}, year = {2020}, abstract = {A treemap is a visualization that has been specifically designed to facilitate the exploration of tree-structured data and, more general, hierarchically structured data. The family of visualization techniques that use a visual metaphor for parent-child relationships based "on the property of containment" (Johnson, 1993) is commonly referred to as treemaps. However, as the number of variations of treemaps grows, it becomes increasingly important to distinguish clearly between techniques and their specific characteristics. This paper proposes to discern between Space-filling Treemap TS, Containment Treemap TC, Implicit Edge Representation Tree TIE, and Mapped Tree TMT for classification of hierarchy visualization techniques and highlights their respective properties. This taxonomy is created as a hyponymy, i.e., its classes have an is-a relationship to one another: TS TC TIE TMT. With this proposal, we intend to stimulate a discussion on a more unambiguous classification of treemaps and, furthermore, broaden what is understood by the concept of treemap itself.}, language = {en} } @misc{RichlyBrauerSchlosser2020, author = {Richly, Keven and Brauer, Janos and Schlosser, Rainer}, title = {Predicting location probabilities of drivers to improved dispatch decisions of transportation network companies based on trajectory data}, series = {Postprints der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, journal = {Postprints der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, number = {9}, doi = {10.25932/publishup-52404}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-524040}, pages = {14}, year = {2020}, abstract = {The demand for peer-to-peer ridesharing services increased over the last years rapidly. To cost-efficiently dispatch orders and communicate accurate pick-up times is challenging as the current location of each available driver is not exactly known since observed locations can be outdated for several seconds. The developed trajectory visualization tool enables transportation network companies to analyze dispatch processes and determine the causes of unexpected delays. As dispatching algorithms are based on the accuracy of arrival time predictions, we account for factors like noise, sample rate, technical and economic limitations as well as the duration of the entire process as they have an impact on the accuracy of spatio-temporal data. To improve dispatching strategies, we propose a prediction approach that provides a probability distribution for a driver's future locations based on patterns observed in past trajectories. We demonstrate the capabilities of our prediction results to ( i) avoid critical delays, (ii) to estimate waiting times with higher confidence, and (iii) to enable risk considerations in dispatching strategies.}, language = {en} } @article{GalkaMoontahaSiniatchkin2020, author = {Galka, Andreas and Moontaha, Sidratul and Siniatchkin, Michael}, title = {Constrained expectation maximisation algorithm for estimating ARMA models in state space representation}, series = {EURASIP journal on advances in signal processing}, volume = {2020}, journal = {EURASIP journal on advances in signal processing}, number = {1}, publisher = {Springer}, address = {Heidelberg}, issn = {1687-6180}, doi = {10.1186/s13634-020-00678-3}, pages = {37}, year = {2020}, abstract = {This paper discusses the fitting of linear state space models to given multivariate time series in the presence of constraints imposed on the four main parameter matrices of these models. Constraints arise partly from the assumption that the models have a block-diagonal structure, with each block corresponding to an ARMA process, that allows the reconstruction of independent source components from linear mixtures, and partly from the need to keep models identifiable. The first stage of parameter fitting is performed by the expectation maximisation (EM) algorithm. Due to the identifiability constraint, a subset of the diagonal elements of the dynamical noise covariance matrix needs to be constrained to fixed values (usually unity). For this kind of constraints, so far, no closed-form update rules were available. We present new update rules for this situation, both for updating the dynamical noise covariance matrix directly and for updating a matrix square-root of this matrix. The practical applicability of the proposed algorithm is demonstrated by a low-dimensional simulation example. The behaviour of the EM algorithm, as observed in this example, illustrates the well-known fact that in practical applications, the EM algorithm should be combined with a different algorithm for numerical optimisation, such as a quasi-Newton algorithm.}, language = {en} } @article{vanderAaLeopoldWeidlich2020, author = {van der Aa, Han and Leopold, Henrik and Weidlich, Matthias}, title = {Partial order resolution of event logs for process conformance checking}, series = {Decision support systems : DSS}, volume = {136}, journal = {Decision support systems : DSS}, publisher = {Elsevier}, address = {Amsterdam [u.a.]}, issn = {0167-9236}, doi = {10.1016/j.dss.2020.113347}, pages = {12}, year = {2020}, abstract = {While supporting the execution of business processes, information systems record event logs. Conformance checking relies on these logs to analyze whether the recorded behavior of a process conforms to the behavior of a normative specification. A key assumption of existing conformance checking techniques, however, is that all events are associated with timestamps that allow to infer a total order of events per process instance. Unfortunately, this assumption is often violated in practice. Due to synchronization issues, manual event recordings, or data corruption, events are only partially ordered. In this paper, we put forward the problem of partial order resolution of event logs to close this gap. It refers to the construction of a probability distribution over all possible total orders of events of an instance. To cope with the order uncertainty in real-world data, we present several estimators for this task, incorporating different notions of behavioral abstraction. Moreover, to reduce the runtime of conformance checking based on partial order resolution, we introduce an approximation method that comes with a bounded error in terms of accuracy. Our experiments with real-world and synthetic data reveal that our approach improves accuracy over the state-of-the-art considerably.}, language = {en} } @phdthesis{Risch2020, author = {Risch, Julian}, title = {Reader comment analysis on online news platforms}, doi = {10.25932/publishup-48922}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-489222}, school = {Universit{\"a}t Potsdam}, pages = {xi, 135}, year = {2020}, abstract = {Comment sections of online news platforms are an essential space to express opinions and discuss political topics. However, the misuse by spammers, haters, and trolls raises doubts about whether the benefits justify the costs of the time-consuming content moderation. As a consequence, many platforms limited or even shut down comment sections completely. In this thesis, we present deep learning approaches for comment classification, recommendation, and prediction to foster respectful and engaging online discussions. The main focus is on two kinds of comments: toxic comments, which make readers leave a discussion, and engaging comments, which make readers join a discussion. First, we discourage and remove toxic comments, e.g., insults or threats. To this end, we present a semi-automatic comment moderation process, which is based on fine-grained text classification models and supports moderators. Our experiments demonstrate that data augmentation, transfer learning, and ensemble learning allow training robust classifiers even on small datasets. To establish trust in the machine-learned models, we reveal which input features are decisive for their output with attribution-based explanation methods. Second, we encourage and highlight engaging comments, e.g., serious questions or factual statements. We automatically identify the most engaging comments, so that readers need not scroll through thousands of comments to find them. The model training process builds on upvotes and replies as a measure of reader engagement. We also identify comments that address the article authors or are otherwise relevant to them to support interactions between journalists and their readership. Taking into account the readers' interests, we further provide personalized recommendations of discussions that align with their favored topics or involve frequent co-commenters. Our models outperform multiple baselines and recent related work in experiments on comment datasets from different platforms.}, language = {en} } @article{CaselDreierFernauetal.2020, author = {Casel, Katrin and Dreier, Jan and Fernau, Henning and Gobbert, Moritz and Kuinke, Philipp and Villaamil, Fernando S{\´a}nchez and Schmid, Markus L. and van Leeuwen, Erik Jan}, title = {Complexity of independency and cliquy trees}, series = {Discrete applied mathematics}, volume = {272}, journal = {Discrete applied mathematics}, publisher = {Elsevier}, address = {Amsterdam [u.a.]}, issn = {0166-218X}, doi = {10.1016/j.dam.2018.08.011}, pages = {2 -- 15}, year = {2020}, abstract = {An independency (cliquy) tree of an n-vertex graph G is a spanning tree of G in which the set of leaves induces an independent set (clique). We study the problems of minimizing or maximizing the number of leaves of such trees, and fully characterize their parameterized complexity. We show that all four variants of deciding if an independency/cliquy tree with at least/most l leaves exists parameterized by l are either Para-NP- or W[1]-hard. We prove that minimizing the number of leaves of a cliquy tree parameterized by the number of internal vertices is Para-NP-hard too. However, we show that minimizing the number of leaves of an independency tree parameterized by the number k of internal vertices has an O*(4(k))-time algorithm and a 2k vertex kernel. Moreover, we prove that maximizing the number of leaves of an independency/cliquy tree parameterized by the number k of internal vertices both have an O*(18(k))-time algorithm and an O(k 2(k)) vertex kernel, but no polynomial kernel unless the polynomial hierarchy collapses to the third level. Finally, we present an O(3(n) . f(n))-time algorithm to find a spanning tree where the leaf set has a property that can be decided in f (n) time and has minimum or maximum size.}, language = {en} } @article{LimanowskiLopesKecketal.2020, author = {Limanowski, Jakub and Lopes, Pedro and Keck, Janis and Baudisch, Patrick and Friston, Karl and Blankenburg, Felix}, title = {Action-dependent processing of touch in the human parietal operculum and posterior insula}, series = {Cerebral Cortex}, volume = {30}, journal = {Cerebral Cortex}, number = {2}, publisher = {Oxford University Press}, address = {Oxford}, issn = {1047-3211}, doi = {10.1093/cercor/bhz111}, pages = {607 -- 617}, year = {2020}, abstract = {Somatosensory input generated by one's actions (i.e., self-initiated body movements) is generally attenuated. Conversely, externally caused somatosensory input is enhanced, for example, during active touch and the haptic exploration of objects. Here, we used functional magnetic resonance imaging (fMRI) to ask how the brain accomplishes this delicate weighting of self-generated versus externally caused somatosensory components. Finger movements were either self-generated by our participants or induced by functional electrical stimulation (FES) of the same muscles. During half of the trials, electrotactile impulses were administered when the (actively or passively) moving finger reached a predefined flexion threshold. fMRI revealed an interaction effect in the contralateral posterior insular cortex (pIC), which responded more strongly to touch during self-generated than during FES-induced movements. A network analysis via dynamic causal modeling revealed that connectivity from the secondary somatosensory cortex via the pIC to the supplementary motor area was generally attenuated during self-generated relative to FES-induced movements-yet specifically enhanced by touch received during self-generated, but not FES-induced movements. Together, these results suggest a crucial role of the parietal operculum and the posterior insula in differentiating self-generated from externally caused somatosensory information received from one's moving limb.}, language = {en} } @article{TrillaDrimallaBajboujetal.2020, author = {Trilla, Irene and Drimalla, Hanna and Bajbouj, Malek and Dziobek, Isabel}, title = {The influence of reward on facial mimicry}, series = {Frontiers in behavioral neuroscience}, volume = {14}, journal = {Frontiers in behavioral neuroscience}, publisher = {Frontiers Media}, address = {Lausanne}, issn = {1662-5153}, doi = {10.3389/fnbeh.2020.00088}, pages = {12}, year = {2020}, abstract = {Recent findings suggest a role of oxytocin on the tendency to spontaneously mimic the emotional facial expressions of others. Oxytocin-related increases of facial mimicry, however, seem to be dependent on contextual factors. Given previous literature showing that people preferentially mimic emotional expressions of individuals associated with high (vs. low) rewards, we examined whether the reward value of the mimicked agent is one factor influencing the oxytocin effects on facial mimicry. To test this hypothesis, 60 male adults received 24 IU of either intranasal oxytocin or placebo in a double-blind, between-subject experiment. Next, the value of male neutral faces was manipulated using an associative learning task with monetary rewards. After the reward associations were learned, participants watched videos of the same faces displaying happy and angry expressions. Facial reactions to the emotional expressions were measured with electromyography. We found that participants judged as more pleasant the face identities associated with high reward values than with low reward values. However, happy expressions by low rewarding faces were more spontaneously mimicked than high rewarding faces. Contrary to our expectations, we did not find a significant direct effect of intranasal oxytocin on facial mimicry, nor on the reward-driven modulation of mimicry. Our results support the notion that mimicry is a complex process that depends on contextual factors, but failed to provide conclusive evidence of a role of oxytocin on the modulation of facial mimicry.}, language = {en} } @article{DoerrKoetzing2020, author = {Doerr, Benjamin and K{\"o}tzing, Timo}, title = {Multiplicative Up-Drift}, series = {Algorithmica}, volume = {83}, journal = {Algorithmica}, number = {10}, publisher = {Springer}, address = {New York}, issn = {0178-4617}, doi = {10.1007/s00453-020-00775-7}, pages = {3017 -- 3058}, year = {2020}, abstract = {Drift analysis aims at translating the expected progress of an evolutionary algorithm (or more generally, a random process) into a probabilistic guarantee on its run time (hitting time). So far, drift arguments have been successfully employed in the rigorous analysis of evolutionary algorithms, however, only for the situation that the progress is constant or becomes weaker when approaching the target. Motivated by questions like how fast fit individuals take over a population, we analyze random processes exhibiting a (1+delta)-multiplicative growth in expectation. We prove a drift theorem translating this expected progress into a hitting time. This drift theorem gives a simple and insightful proof of the level-based theorem first proposed by Lehre (2011). Our version of this theorem has, for the first time, the best-possible near-linear dependence on 1/delta} (the previous results had an at least near-quadratic dependence), and it only requires a population size near-linear in delta (this was super-quadratic in previous results). These improvements immediately lead to stronger run time guarantees for a number of applications. We also discuss the case of large delta and show stronger results for this setting.}, language = {en} } @misc{KruseKaoudiContrerasRojasetal.2020, author = {Kruse, Sebastian and Kaoudi, Zoi and Contreras-Rojas, Bertty and Chawla, Sanjay and Naumann, Felix and Quian{\´e}-Ruiz, Jorge-Arnulfo}, title = {RHEEMix in the data jungle}, series = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, journal = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, number = {6}, doi = {10.25932/publishup-51944}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-519443}, pages = {26}, year = {2020}, abstract = {Data analytics are moving beyond the limits of a single platform. In this paper, we present the cost-based optimizer of Rheem, an open-source cross-platform system that copes with these new requirements. The optimizer allocates the subtasks of data analytic tasks to the most suitable platforms. Our main contributions are: (i) a mechanism based on graph transformations to explore alternative execution strategies; (ii) a novel graph-based approach to determine efficient data movement plans among subtasks and platforms; and (iii) an efficient plan enumeration algorithm, based on a novel enumeration algebra. We extensively evaluate our optimizer under diverse real tasks. We show that our optimizer can perform tasks more than one order of magnitude faster when using multiple platforms than when using a single platform.}, language = {en} } @article{KruseKaoudiContrerasRojasetal.2020, author = {Kruse, Sebastian and Kaoudi, Zoi and Contreras-Rojas, Bertty and Chawla, Sanjay and Naumann, Felix and Quiane-Ruiz, Jorge-Arnulfo}, title = {RHEEMix in the data jungle}, series = {The VLDB Journal}, volume = {29}, journal = {The VLDB Journal}, number = {6}, publisher = {Springer}, address = {Berlin}, issn = {1066-8888}, doi = {10.1007/s00778-020-00612-x}, pages = {1287 -- 1310}, year = {2020}, abstract = {Data analytics are moving beyond the limits of a single platform. In this paper, we present the cost-based optimizer of Rheem, an open-source cross-platform system that copes with these new requirements. The optimizer allocates the subtasks of data analytic tasks to the most suitable platforms. Our main contributions are: (i) a mechanism based on graph transformations to explore alternative execution strategies; (ii) a novel graph-based approach to determine efficient data movement plans among subtasks and platforms; and (iii) an efficient plan enumeration algorithm, based on a novel enumeration algebra. We extensively evaluate our optimizer under diverse real tasks. We show that our optimizer can perform tasks more than one order of magnitude faster when using multiple platforms than when using a single platform.}, language = {en} } @book{MaximovaSchneiderGiese2020, author = {Maximova, Maria and Schneider, Sven and Giese, Holger}, title = {Compositional analysis of probabilistic timed graph transformation systems}, number = {133}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-501-9}, issn = {1613-5652}, doi = {10.25932/publishup-49013}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-490131}, publisher = {Universit{\"a}t Potsdam}, pages = {53}, year = {2020}, abstract = {The analysis of behavioral models is of high importance for cyber-physical systems, as the systems often encompass complex behavior based on e.g. concurrent components with mutual exclusion or probabilistic failures on demand. The rule-based formalism of probabilistic timed graph transformation systems is a suitable choice when the models representing states of the system can be understood as graphs and timed and probabilistic behavior is important. However, model checking PTGTSs is limited to systems with rather small state spaces. We present an approach for the analysis of large scale systems modeled as probabilistic timed graph transformation systems by systematically decomposing their state spaces into manageable fragments. To obtain qualitative and quantitative analysis results for a large scale system, we verify that results obtained for its fragments serve as overapproximations for the corresponding results of the large scale system. Hence, our approach allows for the detection of violations of qualitative and quantitative safety properties for the large scale system under analysis. We consider a running example in which we model shuttles driving on tracks of a large scale topology and for which we verify that shuttles never collide and are unlikely to execute emergency brakes. In our evaluation, we apply an implementation of our approach to the running example.}, language = {en} } @phdthesis{Staubitz2020, author = {Staubitz, Thomas}, title = {Gradable team assignments in large scale learning environments}, doi = {10.25932/publishup-47183}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-471830}, school = {Universit{\"a}t Potsdam}, pages = {122}, year = {2020}, abstract = {Lifelong learning plays an increasingly important role in many societies. Technology is changing faster than ever and what has been important to learn today, may be obsolete tomorrow. The role of informal programs is becoming increasingly important. Particularly, Massive Open Online Courses have become popular among learners and instructors. In 2008, a group of Canadian education enthusiasts started the first Massive Open Online Courses or MOOCs to prove their cognitive theory of Connectivism. Around 2012, a variety of American start-ups redefined the concept of MOOCs. Instead of following the connectivist doctrine they returned to a more traditional approach. They focussed on video lecturing and combined this with a course forum that allowed the participants to discuss with each other and the teaching team. While this new version of the concept was enormously successful in terms of massiveness—hundreds of thousands of participants from all over the world joined the first of these courses—many educators criticized the re-lapse to the cognitivist model. In the early days, the evolving platforms often did not have more features than a video player, simple multiple-choice quizzes, and the course forum. It soon became a major interest of research to allow the scaling of more modern approaches of learning and teaching for the massiveness of these courses. Hands-on exercises, alternative forms of assessment, collaboration, and teamwork are some of the topics on the agenda. The insights provided by cognitive and pedagogical theories, however, do not necessarily always run in sync with the needs and the preferences of the majority of participants. While the former promote action-learning, hands-on-learning, competence-based-learning, project-based-learning, team-based-learning as the holy grail, many of the latter often rather prefer a more laid-back style of learning, sometimes referred to as edutainment. Obviously, given the large numbers of participants in these courses, there is not just one type of learners. Participants are not a homogeneous mass but a potpourri of individuals with a wildly heterogeneous mix of backgrounds, previous knowledge, familial and professional circumstances, countries of origin, gender, age, and so on. For the majority of participants, a full-time job and/or a family often just does not leave enough room for more time intensive tasks, such as practical exercises or teamwork. Others, however, particularly enjoy these hands-on or collaborative aspects of MOOCs. Furthermore, many subjects particularly require these possibilities and simply cannot be taught or learned in courses that lack collaborative or hands-on features. In this context, the thesis discusses how team assignments have been implemented on the HPI MOOC platform. During the recent years, several experiments have been conducted and a great amount of experience has been gained by employing team assignments in courses in areas, such as Object-Oriented Programming, Design Thinking, and Business Innovation on various instances of this platform: openHPI, openSAP, and mooc.house}, language = {en} } @phdthesis{Renz2020, author = {Renz, Jan}, title = {Lebensbegleitendes Lernen in einer digitalen Welt}, doi = {10.25932/publishup-47257}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-472573}, school = {Universit{\"a}t Potsdam}, pages = {vii, 184}, year = {2020}, abstract = {In unserer digitalisierten Welt verlagert sich das Lernen in die Cloud. Vom Unterricht in der Schule und der Tafel zum Tablet, hin zu einem lebenslangen Lernen in der Arbeitswelt und sogar dar{\"u}ber hinaus. Wie erfolgreich und attraktiv dieses zeitgem{\"a}ße Lernen erfolgt, h{\"a}ngt nicht unwesentlich von den technologischen M{\"o}glichkeiten ab, die digitale Lernplattformen rund um MOOCs und Schul-Clouds bieten. Bei deren Weiterentwicklung sollten statt {\"o}konomischen Messgr{\"o}ßen und KPIs die Lernenden und ihre Lernerfahrungen im Vordergrund stehen. Hierf{\"u}r wurde ein Optimierungsframework entwickelt, das f{\"u}r die Entwicklung von Lernplattformen anhand verschiedener qualitativer und quantitative Methoden Verbesserungen identifiziert, priorisiert und deren Beurteilung und Umsetzung steuert. Datengest{\"u}tzte Entscheidungen sollten auf einer ausreichenden Datenbasis aufbauen. Moderne Web-Anwendungen bestehen aber oft aus mehreren Microservices mit jeweils eigener Datenhaltung. Viele Daten sind daher nicht mehr einfach zug{\"a}nglich. Daher wird in dieser Arbeit ein Learning Analytics Dienst eingef{\"u}hrt, der diese Daten sammelt und verarbeitet. Darauf aufbauend werden Metriken eingef{\"u}hrt, auf deren Grundlage die erfassten Daten nutzbar werden und die somit zu verschiedenen Zwecken verwendet werden k{\"o}nnen. Neben der Visualisierung der Daten in Dashboards werden die Daten f{\"u}r eine automatisierte Qualit{\"a}tskontrolle herangezogen. So kann festgestellt werden, wenn Tests zu schwierig oder die soziale Interaktion in einem MOOC zu gering ist. Die vorgestellte Infrastruktur l{\"a}sst sich aber auch verwenden, um verschiedene A/B/n-Tests durchzuf{\"u}hren. In solchen Tests gibt es mehrere Varianten, die an verschiedene Nutzergruppen in einem kontrollierten Experiment erprobt werden. Dank der vorgestellten Testinfrastruktur, die in der HPI MOOC Plattform eingebaut wurde, kann ermittelt werden, ob sich f{\"u}r diese Gruppen statistisch signifikante {\"A}nderungen in der Nutzung feststellen lassen. Dies wurde mit f{\"u}nf verschiedenen Verbesserungen der HPI MOOC Plattform evaluiert, auf der auch openHPI und openSAP basieren. Dabei konnte gezeigt werden, dass sich Lernende mit reaktivierenden Mails zur{\"u}ck in den Kurs holen lassen. Es ist prim{\"a}r die Kommunikation der unbearbeiteten Lerninhalte des Nutzers, die eine reaktivierende Wirkung hat. Auch {\"U}bersichtsmails, die die Forenaktivit{\"a}t zusammenfassen, haben einen positiven Effekt erzielt. Ein gezieltes On-Boarding kann dazu f{\"u}hren, dass die Nutzer die Plattform besser verstehen und hierdurch aktiver sind. Der vierte Test konnte zeigen, dass die Zuordnung von Forenfragen zu einem bestimmten Zeitpunkt im Video und die grafische Anzeige dieser Informationen zu einer erh{\"o}hten Forenaktivit{\"a}t f{\"u}hrt. Auch die experimentelle Erprobung von unterschiedlichen Lernmaterialien, wie sie im f{\"u}nften Test durchgef{\"u}hrt wurde, ist in MOOCs hilfreich, um eine Verbesserung der Kursmaterialien zu erreichen. Neben diesen funktionalen Verbesserungen wird untersucht wie MOOC Plattformen und Schul-Clouds einen Nutzen bieten k{\"o}nnen, wenn Nutzern nur eine schwache oder unzuverl{\"a}ssige Internetanbindung zur Verf{\"u}gung steht (wie dies in vielen deutschen Schulen der Fall ist). Hier wird gezeigt, dass durch ein geschicktes Vorausladen von Daten die Internetanbindungen entlastet werden k{\"o}nnen. Teile der Lernanwendungen funktionieren dank dieser Anpassungen, selbst wenn keine Verbindung zum Internet besteht. Als Letztes wird gezeigt, wie Endger{\"a}te sich in einem lokalen Peer-to-Peer CDN gegenseitig mit Daten versorgen k{\"o}nnen, ohne dass diese aus dem Internet heruntergeladen werden m{\"u}ssen.}, language = {de} } @phdthesis{Sianipar2020, author = {Sianipar, Johannes Harungguan}, title = {Towards scalable and secure virtual laboratory for cybersecurity e-learning}, doi = {10.25932/publishup-50279}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-502793}, school = {Universit{\"a}t Potsdam}, pages = {xviii, 156}, year = {2020}, abstract = {Distance Education or e-Learning platform should be able to provide a virtual laboratory to let the participants have hands-on exercise experiences in practicing their skill remotely. Especially in Cybersecurity e-Learning where the participants need to be able to attack or defend the IT System. To have a hands-on exercise, the virtual laboratory environment must be similar to the real operational environment, where an attack or a victim is represented by a node in a virtual laboratory environment. A node is usually represented by a Virtual Machine (VM). Scalability has become a primary issue in the virtual laboratory for cybersecurity e-Learning because a VM needs a significant and fix allocation of resources. Available resources limit the number of simultaneous users. Scalability can be increased by increasing the efficiency of using available resources and by providing more resources. Increasing scalability means increasing the number of simultaneous users. In this thesis, we propose two approaches to increase the efficiency of using the available resources. The first approach in increasing efficiency is by replacing virtual machines (VMs) with containers whenever it is possible. The second approach is sharing the load with the user-on-premise machine, where the user-on-premise machine represents one of the nodes in a virtual laboratory scenario. We also propose two approaches in providing more resources. One way to provide more resources is by using public cloud services. Another way to provide more resources is by gathering resources from the crowd, which is referred to as Crowdresourcing Virtual Laboratory (CRVL). In CRVL, the crowd can contribute their unused resources in the form of a VM, a bare metal system, an account in a public cloud, a private cloud and an isolated group of VMs, but in this thesis, we focus on a VM. The contributor must give the credential of the VM admin or root user to the CRVL system. We propose an architecture and methods to integrate or dis-integrate VMs from the CRVL system automatically. A Team placement algorithm must also be investigated to optimize the usage of resources and at the same time giving the best service to the user. Because the CRVL system does not manage the contributor host machine, the CRVL system must be able to make sure that the VM integration will not harm their system and that the training material will be stored securely in the contributor sides, so that no one is able to take the training material away without permission. We are investigating ways to handle this kind of threats. We propose three approaches to strengthen the VM from a malicious host admin. To verify the integrity of a VM before integration to the CRVL system, we propose a remote verification method without using any additional hardware such as the Trusted Platform Module chip. As the owner of the host machine, the host admins could have access to the VM's data via Random Access Memory (RAM) by doing live memory dumping, Spectre and Meltdown attacks. To make it harder for the malicious host admin in getting the sensitive data from RAM, we propose a method that continually moves sensitive data in RAM. We also propose a method to monitor the host machine by installing an agent on it. The agent monitors the hypervisor configurations and the host admin activities. To evaluate our approaches, we conduct extensive experiments with different settings. The use case in our approach is Tele-Lab, a Virtual Laboratory platform for Cyber Security e-Learning. We use this platform as a basis for designing and developing our approaches. The results show that our approaches are practical and provides enhanced security.}, language = {en} } @book{BeinBraunDaaseetal.2020, author = {Bein, Leon and Braun, Tom and Daase, Bj{\"o}rn and Emsbach, Elina and Matthes, Leon and Stiede, Maximilian and Taeumel, Marcel and Mattis, Toni and Ramson, Stefan and Rein, Patrick and Hirschfeld, Robert and M{\"o}nig, Jens}, title = {SandBlocks}, number = {132}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-482-1}, issn = {1613-5652}, doi = {10.25932/publishup-43926}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-439263}, publisher = {Universit{\"a}t Potsdam}, pages = {viii, 212}, year = {2020}, abstract = {Visuelle Programmiersprachen werden heutzutage zugunsten textueller Programmiersprachen nahezu nicht verwendet, obwohl visuelle Programmiersprachen einige Vorteile bieten. Diese reichen von der Vermeidung von Syntaxfehlern, {\"u}ber die Nutzung konkreter dom{\"a}nenspezifischer Notation bis hin zu besserer Lesbarkeit und Wartbarkeit des Programms. Trotzdem greifen professionelle Softwareentwickler nahezu ausschließlich auf textuelle Programmiersprachen zur{\"u}ck. Damit Entwickler diese Vorteile visueller Programmiersprachen nutzen k{\"o}nnen, aber trotzdem nicht auf die ihnen bekannten textuellen Programmiersprachen verzichten m{\"u}ssen, gibt es die Idee, textuelle und visuelle Programmelemente gemeinsam in einer Programmiersprache nutzbar zu machen. Damit ist dem Entwickler {\"u}berlassen wann und wie er visuelle Elemente in seinem Programmcode verwendet. Diese Arbeit stellt das SandBlocks-Framework vor, das diese gemeinsame Nutzung visueller und textueller Programmelemente erm{\"o}glicht. Neben einer Auswertung visueller Programmiersprachen, zeigt es die technische Integration visueller Programmelemente in das Squeak/Smalltalk-System auf, gibt Einblicke in die Umsetzung und Verwendung in Live-Programmiersystemen und diskutiert ihre Verwendung in unterschiedlichen Dom{\"a}nen.}, language = {de} } @phdthesis{ChujfiLaRoche2020, author = {Chujfi-La-Roche, Salim}, title = {Human Cognition and natural Language Processing in the Digitally Mediated Environment}, school = {Universit{\"a}t Potsdam}, pages = {148}, year = {2020}, abstract = {Organizations continue to assemble and rely upon teams of remote workers as an essential element of their business strategy; however, knowledge processing is particular difficult in such isolated, largely digitally mediated settings. The great challenge for a knowledge-based organization lies not in how individuals should interact using technology but in how to achieve effective cooperation and knowledge exchange. Currently more attention has been paid to technology and the difficulties machines have processing natural language and less to studies of the human aspect—the influence of our own individual cognitive abilities and preferences on the processing of information when interacting online. This thesis draws on four scientific domains involved in the process of interpreting and processing massive, unstructured data—knowledge management, linguistics, cognitive science, and artificial intelligence—to build a model that offers a reliable way to address the ambiguous nature of language and improve workers' digitally mediated interactions. Human communication can be discouragingly imprecise and is characterized by a strong linguistic ambiguity; this represents an enormous challenge for the computer analysis of natural language. In this thesis, I propose and develop a new data interpretation layer for the processing of natural language based on the human cognitive preferences of the conversants themselves. Such a semantic analysis merges information derived both from the content and from the associated social and individual contexts, as well as the social dynamics that emerge online. At the same time, assessment taxonomies are used to analyze online comportment at the individual and community level in order to successfully identify characteristics leading to greater effectiveness of communication. Measurement patterns for identifying effective methods of individual interaction with regard to individual cognitive and learning preferences are also evaluated; a novel Cyber-Cognitive Identity (CCI)—a perceptual profile of an individual's cognitive and learning styles—is proposed. Accommodation of such cognitive preferences can greatly facilitate knowledge management in the geographically dispersed and collaborative digital environment. Use of the CCI is proposed for cognitively labeled Latent Dirichlet Allocation (CLLDA), a novel method for automatically labeling and clustering knowledge that does not rely solely on probabilistic methods, but rather on a fusion of machine learning algorithms and the cognitive identities of the associated individuals interacting in a digitally mediated environment. Advantages include: a greater perspicuity of dynamic and meaningful cognitive rules leading to greater tagging accuracy and a higher content portability at the sentence, document, and corpus level with respect to digital communication.}, language = {en} } @article{RezaeiYangMeinel2020, author = {Rezaei, Mina and Yang, Haojin and Meinel, Christoph}, title = {Recurrent generative adversarial network for learning imbalanced medical image semantic segmentation}, series = {Multimedia tools and applications : an international journal}, volume = {79}, journal = {Multimedia tools and applications : an international journal}, number = {21-22}, publisher = {Springer}, address = {Dordrecht}, issn = {1380-7501}, doi = {10.1007/s11042-019-7305-1}, pages = {15329 -- 15348}, year = {2020}, abstract = {We propose a new recurrent generative adversarial architecture named RNN-GAN to mitigate imbalance data problem in medical image semantic segmentation where the number of pixels belongs to the desired object are significantly lower than those belonging to the background. A model trained with imbalanced data tends to bias towards healthy data which is not desired in clinical applications and predicted outputs by these networks have high precision and low recall. To mitigate imbalanced training data impact, we train RNN-GAN with proposed complementary segmentation mask, in addition, ordinary segmentation masks. The RNN-GAN consists of two components: a generator and a discriminator. The generator is trained on the sequence of medical images to learn corresponding segmentation label map plus proposed complementary label both at a pixel level, while the discriminator is trained to distinguish a segmentation image coming from the ground truth or from the generator network. Both generator and discriminator substituted with bidirectional LSTM units to enhance temporal consistency and get inter and intra-slice representation of the features. We show evidence that the proposed framework is applicable to different types of medical images of varied sizes. In our experiments on ACDC-2017, HVSMR-2016, and LiTS-2017 benchmarks we find consistently improved results, demonstrating the efficacy of our approach.}, language = {en} } @article{BinTareafBergerHennigetal.2020, author = {Bin Tareaf, Raad and Berger, Philipp and Hennig, Patrick and Meinel, Christoph}, title = {Cross-platform personality exploration system for online social networks}, series = {Web intelligence}, volume = {18}, journal = {Web intelligence}, number = {1}, publisher = {IOS Press}, address = {Amsterdam}, issn = {2405-6456}, doi = {10.3233/WEB-200427}, pages = {35 -- 51}, year = {2020}, abstract = {Social networking sites (SNS) are a rich source of latent information about individual characteristics. Crawling and analyzing this content provides a new approach for enterprises to personalize services and put forward product recommendations. In the past few years, commercial brands made a gradual appearance on social media platforms for advertisement, customers support and public relation purposes and by now it became a necessity throughout all branches. This online identity can be represented as a brand personality that reflects how a brand is perceived by its customers. We exploited recent research in text analysis and personality detection to build an automatic brand personality prediction model on top of the (Five-Factor Model) and (Linguistic Inquiry and Word Count) features extracted from publicly available benchmarks. Predictive evaluation on brands' accounts reveals that Facebook platform provides a slight advantage over Twitter platform in offering more self-disclosure for users' to express their emotions especially their demographic and psychological traits. Results also confirm the wider perspective that the same social media account carry a quite similar and comparable personality scores over different social media platforms. For evaluating our prediction results on actual brands' accounts, we crawled the Facebook API and Twitter API respectively for 100k posts from the most valuable brands' pages in the USA and we visualize exemplars of comparison results and present suggestions for future directions.}, language = {en} } @book{ZhangPlauthEberhardtetal.2020, author = {Zhang, Shuhao and Plauth, Max and Eberhardt, Felix and Polze, Andreas and Lehmann, Jens and Sejdiu, Gezim and Jabeen, Hajira and Servadei, Lorenzo and M{\"o}stl, Christian and B{\"a}r, Florian and Netzeband, Andr{\´e} and Schmidt, Rainer and Knigge, Marlene and Hecht, Sonja and Prifti, Loina and Krcmar, Helmut and Sapegin, Andrey and Jaeger, David and Cheng, Feng and Meinel, Christoph and Friedrich, Tobias and Rothenberger, Ralf and Sutton, Andrew M. and Sidorova, Julia A. and Lundberg, Lars and Rosander, Oliver and Sk{\"o}ld, Lars and Di Varano, Igor and van der Walt, Est{\´e}e and Eloff, Jan H. P. and Fabian, Benjamin and Baumann, Annika and Ermakova, Tatiana and Kelkel, Stefan and Choudhary, Yash and Cooray, Thilini and Rodr{\´i}guez, Jorge and Medina-P{\´e}rez, Miguel Angel and Trejo, Luis A. and Barrera-Animas, Ari Yair and Monroy-Borja, Ra{\´u}l and L{\´o}pez-Cuevas, Armando and Ram{\´i}rez-M{\´a}rquez, Jos{\´e} Emmanuel and Grohmann, Maria and Niederleithinger, Ernst and Podapati, Sasidhar and Schmidt, Christopher and Huegle, Johannes and de Oliveira, Roberto C. L. and Soares, F{\´a}bio Mendes and van Hoorn, Andr{\´e} and Neumer, Tamas and Willnecker, Felix and Wilhelm, Mathias and Kuster, Bernhard}, title = {HPI Future SOC Lab - Proceedings 2017}, number = {130}, editor = {Meinel, Christoph and Polze, Andreas and Beins, Karsten and Strotmann, Rolf and Seibold, Ulrich and R{\"o}dszus, Kurt and M{\"u}ller, J{\"u}rgen}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-475-3}, issn = {1613-5652}, doi = {10.25932/publishup-43310}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-433100}, publisher = {Universit{\"a}t Potsdam}, pages = {ix, 235}, year = {2020}, abstract = {The "HPI Future SOC Lab" is a cooperation of the Hasso Plattner Institute (HPI) and industry partners. Its mission is to enable and promote exchange and interaction between the research community and the industry partners. The HPI Future SOC Lab provides researchers with free of charge access to a complete infrastructure of state of the art hard and software. This infrastructure includes components, which might be too expensive for an ordinary research environment, such as servers with up to 64 cores and 2 TB main memory. The offerings address researchers particularly from but not limited to the areas of computer science and business information systems. Main areas of research include cloud computing, parallelization, and In-Memory technologies. This technical report presents results of research projects executed in 2017. Selected projects have presented their results on April 25th and November 15th 2017 at the Future SOC Lab Day events.}, language = {en} } @misc{LewkowiczWohlbrandtBoettinger2020, author = {Lewkowicz, Daniel and Wohlbrandt, Attila and B{\"o}ttinger, Erwin}, title = {Economic impact of clinical decision support interventions based on electronic health records}, series = {Postprints der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, journal = {Postprints der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, number = {5}, doi = {10.25932/publishup-50137}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-501376}, pages = {14}, year = {2020}, abstract = {Background Unnecessary healthcare utilization, non-adherence to current clinical guidelines, or insufficient personalized care are perpetual challenges and remain potential major cost-drivers for healthcare systems around the world. Implementing decision support systems into clinical care is promised to improve quality of care and thereby yield substantial effects on reducing healthcare expenditure. In this article, we evaluate the economic impact of clinical decision support (CDS) interventions based on electronic health records (EHR). Methods We searched for studies published after 2014 using MEDLINE, CENTRAL, WEB OF SCIENCE, EBSCO, and TUFTS CEA registry databases that encompass an economic evaluation or consider cost outcome measures of EHR based CDS interventions. Thereupon, we identified best practice application areas and categorized the investigated interventions according to an existing taxonomy of front-end CDS tools. Results and discussion Twenty-seven studies are investigated in this review. Of those, twenty-two studies indicate a reduction of healthcare expenditure after implementing an EHR based CDS system, especially towards prevalent application areas, such as unnecessary laboratory testing, duplicate order entry, efficient transfusion practice, or reduction of antibiotic prescriptions. On the contrary, order facilitators and undiscovered malfunctions revealed to be threats and could lead to new cost drivers in healthcare. While high upfront and maintenance costs of CDS systems are a worldwide implementation barrier, most studies do not consider implementation cost. Finally, four included economic evaluation studies report mixed monetary outcome results and thus highlight the importance of further high-quality economic evaluations for these CDS systems. Conclusion Current research studies lack consideration of comparative cost-outcome metrics as well as detailed cost components in their analyses. Nonetheless, the positive economic impact of EHR based CDS interventions is highly promising, especially with regard to reducing waste in healthcare.}, language = {en} } @article{PiroDadiSeileretal.2020, author = {Piro, Vitor C. and Dadi, Temesgen H. and Seiler, Enrico and Reinert, Knut and Renard, Bernhard Y.}, title = {ganon}, series = {Bioinformatics}, volume = {36}, journal = {Bioinformatics}, publisher = {Oxford Univ. Press}, address = {Oxford}, issn = {1367-4811}, doi = {https://doi.org/10.1093/bioinformatics/btaa458}, pages = {12 -- 20}, year = {2020}, abstract = {Motivation: The exponential growth of assembled genome sequences greatly benefits metagenomics studies. However, currently available methods struggle to manage the increasing amount of sequences and their frequent updates. Indexing the current RefSeq can take days and hundreds of GB of memory on large servers. Few methods address these issues thus far, and even though many can theoretically handle large amounts of references, time/memory requirements are prohibitive in practice. As a result, many studies that require sequence classification use often outdated and almost never truly up-to-date indices. Results: Motivated by those limitations, we created ganon, a k-mer-based read classification tool that uses Interleaved Bloom Filters in conjunction with a taxonomic clustering and a k-mer counting/filtering scheme. Ganon provides an efficient method for indexing references, keeping them updated. It requires <55 min to index the complete RefSeq of bacteria, archaea, fungi and viruses. The tool can further keep these indices up-to-date in a fraction of the time necessary to create them. Ganon makes it possible to query against very large reference sets and therefore it classifies significantly more reads and identifies more species than similar methods. When classifying a high-complexity CAMI challenge dataset against complete genomes from RefSeq, ganon shows strongly increased precision with equal or better sensitivity compared with state-of-the-art tools. With the same dataset against the complete RefSeq, ganon improved the F1-score by 65\% at the genus level. It supports taxonomy- and assembly-level classification, multiple indices and hierarchical classification.}, language = {en} } @misc{GorskiJungLietal.2020, author = {Gorski, Mathias and Jung, Bettina and Li, Yong and Matias-Garcia, Pamela R. and Wuttke, Matthias and Coassin, Stefan and Thio, Chris H. L. and Kleber, Marcus E. and Winkler, Thomas W. and Wanner, Veronika and Chai, Jin-Fang and Chu, Audrey Y. and Cocca, Massimiliano and Feitosa, Mary F. and Ghasemi, Sahar and Hoppmann, Anselm and Horn, Katrin and Li, Man and Nutile, Teresa and Scholz, Markus and Sieber, Karsten B. and Teumer, Alexander and Tin, Adrienne and Wang, Judy and Tayo, Bamidele O. and Ahluwalia, Tarunveer S. and Almgren, Peter and Bakker, Stephan J. L. and Banas, Bernhard and Bansal, Nisha and Biggs, Mary L. and Boerwinkle, Eric and B{\"o}ttinger, Erwin and Brenner, Hermann and Carroll, Robert J. and Chalmers, John and Chee, Miao-Li and Chee, Miao-Ling and Cheng, Ching-Yu and Coresh, Josef and de Borst, Martin H. and Degenhardt, Frauke and Eckardt, Kai-Uwe and Endlich, Karlhans and Franke, Andre and Freitag-Wolf, Sandra and Gampawar, Piyush and Gansevoort, Ron T. and Ghanbari, Mohsen and Gieger, Christian and Hamet, Pavel and Ho, Kevin and Hofer, Edith and Holleczek, Bernd and Foo, Valencia Hui Xian and Hutri-Kahonen, Nina and Hwang, Shih-Jen and Ikram, M. Arfan and Josyula, Navya Shilpa and Kahonen, Mika and Khor, Chiea-Chuen and Koenig, Wolfgang and Kramer, Holly and Kraemer, Bernhard K. and Kuehnel, Brigitte and Lange, Leslie A. and Lehtimaki, Terho and Lieb, Wolfgang and Loos, Ruth J. F. and Lukas, Mary Ann and Lyytikainen, Leo-Pekka and Meisinger, Christa and Meitinger, Thomas and Melander, Olle and Milaneschi, Yuri and Mishra, Pashupati P. and Mononen, Nina and Mychaleckyj, Josyf C. and Nadkarni, Girish N. and Nauck, Matthias and Nikus, Kjell and Ning, Boting and Nolte, Ilja M. and O'Donoghue, Michelle L. and Orho-Melander, Marju and Pendergrass, Sarah A. and Penninx, Brenda W. J. H. and Preuss, Michael H. and Psaty, Bruce M. and Raffield, Laura M. and Raitakari, Olli T. and Rettig, Rainer and Rheinberger, Myriam and Rice, Kenneth M. and Rosenkranz, Alexander R. and Rossing, Peter and Rotter, Jerome and Sabanayagam, Charumathi and Schmidt, Helena and Schmidt, Reinhold and Schoettker, Ben and Schulz, Christina-Alexandra and Sedaghat, Sanaz and Shaffer, Christian M. and Strauch, Konstantin and Szymczak, Silke and Taylor, Kent D. and Tremblay, Johanne and Chaker, Layal and van der Harst, Pim and van der Most, Peter J. and Verweij, Niek and Voelker, Uwe and Waldenberger, Melanie and Wallentin, Lars and Waterworth, Dawn M. and White, Harvey D. and Wilson, James G. and Wong, Tien-Yin and Woodward, Mark and Yang, Qiong and Yasuda, Masayuki and Yerges-Armstrong, Laura M. and Zhang, Yan and Snieder, Harold and Wanner, Christoph and Boger, Carsten A. and Kottgen, Anna and Kronenberg, Florian and Pattaro, Cristian and Heid, Iris M.}, title = {Meta-analysis uncovers genome-wide significant variants for rapid kidney function decline}, series = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, journal = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, number = {19}, doi = {10.25932/publishup-56537}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-565379}, pages = {14}, year = {2020}, abstract = {Rapid decline of glomerular filtration rate estimated from creatinine (eGFRcrea) is associated with severe clinical endpoints. In contrast to cross-sectionally assessed eGFRcrea, the genetic basis for rapid eGFRcrea decline is largely unknown. To help define this, we meta-analyzed 42 genome-wide association studies from the Chronic Kidney Diseases Genetics Consortium and United Kingdom Biobank to identify genetic loci for rapid eGFRcrea decline. Two definitions of eGFRcrea decline were used: 3 mL/min/1.73m(2)/year or more ("Rapid3"; encompassing 34,874 cases, 107,090 controls) and eGFRcrea decline 25\% or more and eGFRcrea under 60 mL/min/1.73m(2) at follow-up among those with eGFRcrea 60 mL/min/1.73m(2) or more at baseline ("CKDi25"; encompassing 19,901 cases, 175,244 controls). Seven independent variants were identified across six loci for Rapid3 and/or CKDi25: consisting of five variants at four loci with genome-wide significance (near UMOD-PDILT (2), PRKAG2, WDR72, OR2S2) and two variants among 265 known eGFRcrea variants (near GATM, LARP4B). All these loci were novel for Rapid3 and/or CKDi25 and our bioinformatic follow-up prioritized variants and genes underneath these loci. The OR2S2 locus is novel for any eGFRcrea trait including interesting candidates. For the five genome-wide significant lead variants, we found supporting effects for annual change in blood urea nitrogen or cystatin-based eGFR, but not for GATM or (LARP4B). Individuals at high compared to those at low genetic risk (8-14 vs. 0-5 adverse alleles) had a 1.20-fold increased risk of acute kidney injury (95\% confidence interval 1.08-1.33). Thus, our identified loci for rapid kidney function decline may help prioritize therapeutic targets and identify mechanisms and individuals at risk for sustained deterioration of kidney function.}, language = {en} } @article{GorskiJungLietal.2020, author = {Gorski, Mathias and Jung, Bettina and Li, Yong and Matias-Garcia, Pamela R. and Wuttke, Matthias and Coassin, Stefan and Thio, Chris H. L. and Kleber, Marcus E. and Winkler, Thomas W. and Wanner, Veronika and Chai, Jin-Fang and Chu, Audrey Y. and Cocca, Massimiliano and Feitosa, Mary F. and Ghasemi, Sahar and Hoppmann, Anselm and Horn, Katrin and Li, Man and Nutile, Teresa and Scholz, Markus and Sieber, Karsten B. and Teumer, Alexander and Tin, Adrienne and Wang, Judy and Tayo, Bamidele O. and Ahluwalia, Tarunveer S. and Almgren, Peter and Bakker, Stephan J. L. and Banas, Bernhard and Bansal, Nisha and Biggs, Mary L. and Boerwinkle, Eric and B{\"o}ttinger, Erwin and Brenner, Hermann and Carroll, Robert J. and Chalmers, John and Chee, Miao-Li and Chee, Miao-Ling and Cheng, Ching-Yu and Coresh, Josef and de Borst, Martin H. and Degenhardt, Frauke and Eckardt, Kai-Uwe and Endlich, Karlhans and Franke, Andre and Freitag-Wolf, Sandra and Gampawar, Piyush and Gansevoort, Ron T. and Ghanbari, Mohsen and Gieger, Christian and Hamet, Pavel and Ho, Kevin and Hofer, Edith and Holleczek, Bernd and Foo, Valencia Hui Xian and Hutri-Kahonen, Nina and Hwang, Shih-Jen and Ikram, M. Arfan and Josyula, Navya Shilpa and Kahonen, Mika and Khor, Chiea-Chuen and Koenig, Wolfgang and Kramer, Holly and Kraemer, Bernhard K. and Kuehnel, Brigitte and Lange, Leslie A. and Lehtimaki, Terho and Lieb, Wolfgang and Loos, Ruth J. F. and Lukas, Mary Ann and Lyytikainen, Leo-Pekka and Meisinger, Christa and Meitinger, Thomas and Melander, Olle and Milaneschi, Yuri and Mishra, Pashupati P. and Mononen, Nina and Mychaleckyj, Josyf C. and Nadkarni, Girish N. and Nauck, Matthias and Nikus, Kjell and Ning, Boting and Nolte, Ilja M. and O'Donoghue, Michelle L. and Orho-Melander, Marju and Pendergrass, Sarah A. and Penninx, Brenda W. J. H. and Preuss, Michael H. and Psaty, Bruce M. and Raffield, Laura M. and Raitakari, Olli T. and Rettig, Rainer and Rheinberger, Myriam and Rice, Kenneth M. and Rosenkranz, Alexander R. and Rossing, Peter and Rotter, Jerome and Sabanayagam, Charumathi and Schmidt, Helena and Schmidt, Reinhold and Schoettker, Ben and Schulz, Christina-Alexandra and Sedaghat, Sanaz and Shaffer, Christian M. and Strauch, Konstantin and Szymczak, Silke and Taylor, Kent D. and Tremblay, Johanne and Chaker, Layal and van der Harst, Pim and van der Most, Peter J. and Verweij, Niek and Voelker, Uwe and Waldenberger, Melanie and Wallentin, Lars and Waterworth, Dawn M. and White, Harvey D. and Wilson, James G. and Wong, Tien-Yin and Woodward, Mark and Yang, Qiong and Yasuda, Masayuki and Yerges-Armstrong, Laura M. and Zhang, Yan and Snieder, Harold and Wanner, Christoph and Boger, Carsten A. and Kottgen, Anna and Kronenberg, Florian and Pattaro, Cristian and Heid, Iris M.}, title = {Meta-analysis uncovers genome-wide significant variants for rapid kidney function decline}, series = {Kidney international : official journal of the International Society of Nephrology}, volume = {99}, journal = {Kidney international : official journal of the International Society of Nephrology}, number = {4}, publisher = {Elsevier}, address = {New York}, organization = {Lifelines Cohort Study
Regeneron Genetics Ctr}, issn = {0085-2538}, doi = {10.1016/j.kint.2020.09.030}, pages = {926 -- 939}, year = {2020}, abstract = {Rapid decline of glomerular filtration rate estimated from creatinine (eGFRcrea) is associated with severe clinical endpoints. In contrast to cross-sectionally assessed eGFRcrea, the genetic basis for rapid eGFRcrea decline is largely unknown. To help define this, we meta-analyzed 42 genome-wide association studies from the Chronic Kidney Diseases Genetics Consortium and United Kingdom Biobank to identify genetic loci for rapid eGFRcrea decline. Two definitions of eGFRcrea decline were used: 3 mL/min/1.73m(2)/year or more ("Rapid3"; encompassing 34,874 cases, 107,090 controls) and eGFRcrea decline 25\% or more and eGFRcrea under 60 mL/min/1.73m(2) at follow-up among those with eGFRcrea 60 mL/min/1.73m(2) or more at baseline ("CKDi25"; encompassing 19,901 cases, 175,244 controls). Seven independent variants were identified across six loci for Rapid3 and/or CKDi25: consisting of five variants at four loci with genome-wide significance (near UMOD-PDILT (2), PRKAG2, WDR72, OR2S2) and two variants among 265 known eGFRcrea variants (near GATM, LARP4B). All these loci were novel for Rapid3 and/or CKDi25 and our bioinformatic follow-up prioritized variants and genes underneath these loci. The OR2S2 locus is novel for any eGFRcrea trait including interesting candidates. For the five genome-wide significant lead variants, we found supporting effects for annual change in blood urea nitrogen or cystatin-based eGFR, but not for GATM or (LARP4B). Individuals at high compared to those at low genetic risk (8-14 vs. 0-5 adverse alleles) had a 1.20-fold increased risk of acute kidney injury (95\% confidence interval 1.08-1.33). Thus, our identified loci for rapid kidney function decline may help prioritize therapeutic targets and identify mechanisms and individuals at risk for sustained deterioration of kidney function.}, language = {en} } @misc{ZhouFischerTuncaetal.2020, author = {Zhou, Lin and Fischer, Eric and Tunca, Can and Brahms, Clemens Markus and Ersoy, Cem and Granacher, Urs and Arnrich, Bert}, title = {How We Found Our IMU}, series = {Postprints der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, journal = {Postprints der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, number = {2}, doi = {10.25932/publishup-48162}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-481628}, pages = {31}, year = {2020}, abstract = {Inertial measurement units (IMUs) are commonly used for localization or movement tracking in pervasive healthcare-related studies, and gait analysis is one of the most often studied topics using IMUs. The increasing variety of commercially available IMU devices offers convenience by combining the sensor modalities and simplifies the data collection procedures. However, selecting the most suitable IMU device for a certain use case is increasingly challenging. In this study, guidelines for IMU selection are proposed. In particular, seven IMUs were compared in terms of their specifications, data collection procedures, and raw data quality. Data collected from the IMUs were then analyzed by a gait analysis algorithm. The difference in accuracy of the calculated gait parameters between the IMUs could be used to retrace the issues in raw data, such as acceleration range or sensor calibration. Based on our algorithm, we were able to identify the best-suited IMUs for our needs. This study provides an overview of how to select the IMUs based on the area of study with concrete examples, and gives insights into the features of seven commercial IMUs using real data.}, language = {en} } @misc{AlbertOwolabiGebeletal.2020, author = {Albert, Justin Amadeus and Owolabi, Victor and Gebel, Arnd and Brahms, Clemens Markus and Granacher, Urs and Arnrich, Bert}, title = {Evaluation of the Pose Tracking Performance of the Azure Kinect and Kinect v2 for Gait Analysis in Comparison with a Gold Standard}, series = {Postprints der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, journal = {Postprints der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, number = {3}, doi = {10.25932/publishup-48413}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-484130}, pages = {24}, year = {2020}, abstract = {Gait analysis is an important tool for the early detection of neurological diseases and for the assessment of risk of falling in elderly people. The availability of low-cost camera hardware on the market today and recent advances in Machine Learning enable a wide range of clinical and health-related applications, such as patient monitoring or exercise recognition at home. In this study, we evaluated the motion tracking performance of the latest generation of the Microsoft Kinect camera, Azure Kinect, compared to its predecessor Kinect v2 in terms of treadmill walking using a gold standard Vicon multi-camera motion capturing system and the 39 marker Plug-in Gait model. Five young and healthy subjects walked on a treadmill at three different velocities while data were recorded simultaneously with all three camera systems. An easy-to-administer camera calibration method developed here was used to spatially align the 3D skeleton data from both Kinect cameras and the Vicon system. With this calibration, the spatial agreement of joint positions between the two Kinect cameras and the reference system was evaluated. In addition, we compared the accuracy of certain spatio-temporal gait parameters, i.e., step length, step time, step width, and stride time calculated from the Kinect data, with the gold standard system. Our results showed that the improved hardware and the motion tracking algorithm of the Azure Kinect camera led to a significantly higher accuracy of the spatial gait parameters than the predecessor Kinect v2, while no significant differences were found between the temporal parameters. Furthermore, we explain in detail how this experimental setup could be used to continuously monitor the progress during gait rehabilitation in older people.}, language = {en} } @article{AlbertOwolabiGebeletal.2020, author = {Albert, Justin Amadeus and Owolabi, Victor and Gebel, Arnd and Brahms, Clemens Markus and Granacher, Urs and Arnrich, Bert}, title = {Evaluation of the Pose Tracking Performance of the Azure Kinect and Kinect v2 for Gait Analysis in Comparison with a Gold Standard}, series = {Sensors}, volume = {20}, journal = {Sensors}, number = {18}, publisher = {MDPI}, address = {Basel}, issn = {1424-8220}, doi = {10.3390/s20185104}, pages = {22}, year = {2020}, abstract = {Gait analysis is an important tool for the early detection of neurological diseases and for the assessment of risk of falling in elderly people. The availability of low-cost camera hardware on the market today and recent advances in Machine Learning enable a wide range of clinical and health-related applications, such as patient monitoring or exercise recognition at home. In this study, we evaluated the motion tracking performance of the latest generation of the Microsoft Kinect camera, Azure Kinect, compared to its predecessor Kinect v2 in terms of treadmill walking using a gold standard Vicon multi-camera motion capturing system and the 39 marker Plug-in Gait model. Five young and healthy subjects walked on a treadmill at three different velocities while data were recorded simultaneously with all three camera systems. An easy-to-administer camera calibration method developed here was used to spatially align the 3D skeleton data from both Kinect cameras and the Vicon system. With this calibration, the spatial agreement of joint positions between the two Kinect cameras and the reference system was evaluated. In addition, we compared the accuracy of certain spatio-temporal gait parameters, i.e., step length, step time, step width, and stride time calculated from the Kinect data, with the gold standard system. Our results showed that the improved hardware and the motion tracking algorithm of the Azure Kinect camera led to a significantly higher accuracy of the spatial gait parameters than the predecessor Kinect v2, while no significant differences were found between the temporal parameters. Furthermore, we explain in detail how this experimental setup could be used to continuously monitor the progress during gait rehabilitation in older people.}, language = {en} } @article{KonakWegnerArnrich2020, author = {Konak, Orhan and Wegner, Pit and Arnrich, Bert}, title = {IMU-Based Movement Trajectory Heatmaps for Human Activity Recognition}, series = {Sensors}, volume = {20}, journal = {Sensors}, number = {24}, publisher = {MDPI}, address = {Basel}, issn = {1424-8220}, doi = {10.3390/s20247179}, pages = {15}, year = {2020}, abstract = {Recent trends in ubiquitous computing have led to a proliferation of studies that focus on human activity recognition (HAR) utilizing inertial sensor data that consist of acceleration, orientation and angular velocity. However, the performances of such approaches are limited by the amount of annotated training data, especially in fields where annotating data is highly time-consuming and requires specialized professionals, such as in healthcare. In image classification, this limitation has been mitigated by powerful oversampling techniques such as data augmentation. Using this technique, this work evaluates to what extent transforming inertial sensor data into movement trajectories and into 2D heatmap images can be advantageous for HAR when data are scarce. A convolutional long short-term memory (ConvLSTM) network that incorporates spatiotemporal correlations was used to classify the heatmap images. Evaluation was carried out on Deep Inertial Poser (DIP), a known dataset composed of inertial sensor data. The results obtained suggest that for datasets with large numbers of subjects, using state-of-the-art methods remains the best alternative. However, a performance advantage was achieved for small datasets, which is usually the case in healthcare. Moreover, movement trajectories provide a visual representation of human activities, which can help researchers to better interpret and analyze motion patterns.}, language = {en} } @article{ZhouFischerTuncaetal.2020, author = {Zhou, Lin and Fischer, Eric and Tunca, Can and Brahms, Clemens Markus and Ersoy, Cem and Granacher, Urs and Arnrich, Bert}, title = {How We Found Our IMU}, series = {Sensors}, volume = {20}, journal = {Sensors}, number = {15}, publisher = {MDPI}, address = {Basel}, issn = {1424-8220}, doi = {10.3390/s20154090}, pages = {29}, year = {2020}, abstract = {Inertial measurement units (IMUs) are commonly used for localization or movement tracking in pervasive healthcare-related studies, and gait analysis is one of the most often studied topics using IMUs. The increasing variety of commercially available IMU devices offers convenience by combining the sensor modalities and simplifies the data collection procedures. However, selecting the most suitable IMU device for a certain use case is increasingly challenging. In this study, guidelines for IMU selection are proposed. In particular, seven IMUs were compared in terms of their specifications, data collection procedures, and raw data quality. Data collected from the IMUs were then analyzed by a gait analysis algorithm. The difference in accuracy of the calculated gait parameters between the IMUs could be used to retrace the issues in raw data, such as acceleration range or sensor calibration. Based on our algorithm, we were able to identify the best-suited IMUs for our needs. This study provides an overview of how to select the IMUs based on the area of study with concrete examples, and gives insights into the features of seven commercial IMUs using real data.}, language = {en} } @article{LewkowiczWohlbrandtBoettinger2020, author = {Lewkowicz, Daniel and Wohlbrandt, Attila and B{\"o}ttinger, Erwin}, title = {Economic impact of clinical decision support interventions based on electronic health records}, series = {BMC Health Services Research}, volume = {20}, journal = {BMC Health Services Research}, publisher = {BioMed Central}, address = {London}, issn = {1472-6963}, doi = {10.1186/s12913-020-05688-3}, pages = {12}, year = {2020}, abstract = {Background Unnecessary healthcare utilization, non-adherence to current clinical guidelines, or insufficient personalized care are perpetual challenges and remain potential major cost-drivers for healthcare systems around the world. Implementing decision support systems into clinical care is promised to improve quality of care and thereby yield substantial effects on reducing healthcare expenditure. In this article, we evaluate the economic impact of clinical decision support (CDS) interventions based on electronic health records (EHR). Methods We searched for studies published after 2014 using MEDLINE, CENTRAL, WEB OF SCIENCE, EBSCO, and TUFTS CEA registry databases that encompass an economic evaluation or consider cost outcome measures of EHR based CDS interventions. Thereupon, we identified best practice application areas and categorized the investigated interventions according to an existing taxonomy of front-end CDS tools. Results and discussion Twenty-seven studies are investigated in this review. Of those, twenty-two studies indicate a reduction of healthcare expenditure after implementing an EHR based CDS system, especially towards prevalent application areas, such as unnecessary laboratory testing, duplicate order entry, efficient transfusion practice, or reduction of antibiotic prescriptions. On the contrary, order facilitators and undiscovered malfunctions revealed to be threats and could lead to new cost drivers in healthcare. While high upfront and maintenance costs of CDS systems are a worldwide implementation barrier, most studies do not consider implementation cost. Finally, four included economic evaluation studies report mixed monetary outcome results and thus highlight the importance of further high-quality economic evaluations for these CDS systems. Conclusion Current research studies lack consideration of comparative cost-outcome metrics as well as detailed cost components in their analyses. Nonetheless, the positive economic impact of EHR based CDS interventions is highly promising, especially with regard to reducing waste in healthcare.}, language = {en} }