@book{SchreiberKrahnIngallsetal.2016, author = {Schreiber, Robin and Krahn, Robert and Ingalls, Daniel H. H. and Hirschfeld, Robert}, title = {Transmorphic}, number = {110}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-387-9}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-98300}, publisher = {Universit{\"a}t Potsdam}, pages = {100}, year = {2016}, abstract = {Defining Graphical User Interfaces (GUIs) through functional abstractions can reduce the complexity that arises from mutable abstractions. Recent examples, such as Facebook's React GUI framework have shown, how modelling the view as a functional projection from the application state to a visual representation can reduce the number of interacting objects and thus help to improve the reliabiliy of the system. This however comes at the price of a more rigid, functional framework where programmers are forced to express visual entities with functional abstractions, detached from the way one intuitively thinks about the physical world. In contrast to that, the GUI Framework Morphic allows interactions in the graphical domain, such as grabbing, dragging or resizing of elements to evolve an application at runtime, providing liveness and directness in the development workflow. Modelling each visual entity through mutable abstractions however makes it difficult to ensure correctness when GUIs start to grow more complex. Furthermore, by evolving morphs at runtime through direct manipulation we diverge more and more from the symbolic description that corresponds to the morph. Given that both of these approaches have their merits and problems, is there a way to combine them in a meaningful way that preserves their respective benefits? As a solution for this problem, we propose to lift Morphic's concept of direct manipulation from the mutation of state to the transformation of source code. In particular, we will explore the design, implementation and integration of a bidirectional mapping between the graphical representation and a functional and declarative symbolic description of a graphical user interface within a self hosted development environment. We will present Transmorphic, a functional take on the Morphic GUI Framework, where the visual and structural properties of morphs are defined in a purely functional, declarative fashion. In Transmorphic, the developer is able to assemble different morphs at runtime through direct manipulation which is automatically translated into changes in the code of the application. In this way, the comprehensiveness and predictability of direct manipulation can be used in the context of a purely functional GUI, while the effects of the manipulation are reflected in a medium that is always in reach for the programmer and can even be used to incorporate the source transformations into the source files of the application.}, language = {en} } @phdthesis{Scholz2006, author = {Scholz, Matthias}, title = {Approaches to analyse and interpret biological profile data}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-7839}, school = {Universit{\"a}t Potsdam}, year = {2006}, abstract = {Advances in biotechnologies rapidly increase the number of molecules of a cell which can be observed simultaneously. This includes expression levels of thousands or ten-thousands of genes as well as concentration levels of metabolites or proteins. Such Profile data, observed at different times or at different experimental conditions (e.g., heat or dry stress), show how the biological experiment is reflected on the molecular level. This information is helpful to understand the molecular behaviour and to identify molecules or combination of molecules that characterise specific biological condition (e.g., disease). This work shows the potentials of component extraction algorithms to identify the major factors which influenced the observed data. This can be the expected experimental factors such as the time or temperature as well as unexpected factors such as technical artefacts or even unknown biological behaviour. Extracting components means to reduce the very high-dimensional data to a small set of new variables termed components. Each component is a combination of all original variables. The classical approach for that purpose is the principal component analysis (PCA). It is shown that, in contrast to PCA which maximises the variance only, modern approaches such as independent component analysis (ICA) are more suitable for analysing molecular data. The condition of independence between components of ICA fits more naturally our assumption of individual (independent) factors which influence the data. This higher potential of ICA is demonstrated by a crossing experiment of the model plant Arabidopsis thaliana (Thale Cress). The experimental factors could be well identified and, in addition, ICA could even detect a technical artefact. However, in continuously observations such as in time experiments, the data show, in general, a nonlinear distribution. To analyse such nonlinear data, a nonlinear extension of PCA is used. This nonlinear PCA (NLPCA) is based on a neural network algorithm. The algorithm is adapted to be applicable to incomplete molecular data sets. Thus, it provides also the ability to estimate the missing data. The potential of nonlinear PCA to identify nonlinear factors is demonstrated by a cold stress experiment of Arabidopsis thaliana. The results of component analysis can be used to build a molecular network model. Since it includes functional dependencies it is termed functional network. Applied to the cold stress data, it is shown that functional networks are appropriate to visualise biological processes and thereby reveals molecular dynamics.}, subject = {Bioinformatik}, language = {en} } @book{SchneiderMaximovaGiese2022, author = {Schneider, Sven and Maximova, Maria and Giese, Holger}, title = {Invariant Analysis for Multi-Agent Graph Transformation Systems using k-Induction}, number = {143}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-531-6}, issn = {1613-5652}, doi = {10.25932/publishup-54585}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-545851}, publisher = {Universit{\"a}t Potsdam}, pages = {37}, year = {2022}, abstract = {The analysis of behavioral models such as Graph Transformation Systems (GTSs) is of central importance in model-driven engineering. However, GTSs often result in intractably large or even infinite state spaces and may be equipped with multiple or even infinitely many start graphs. To mitigate these problems, static analysis techniques based on finite symbolic representations of sets of states or paths thereof have been devised. We focus on the technique of k-induction for establishing invariants specified using graph conditions. To this end, k-induction generates symbolic paths backwards from a symbolic state representing a violation of a candidate invariant to gather information on how that violation could have been reached possibly obtaining contradictions to assumed invariants. However, GTSs where multiple agents regularly perform actions independently from each other cannot be analyzed using this technique as of now as the independence among backward steps may prevent the gathering of relevant knowledge altogether. In this paper, we extend k-induction to GTSs with multiple agents thereby supporting a wide range of additional GTSs. As a running example, we consider an unbounded number of shuttles driving on a large-scale track topology, which adjust their velocity to speed limits to avoid derailing. As central contribution, we develop pruning techniques based on causality and independence among backward steps and verify that k-induction remains sound under this adaptation as well as terminates in cases where it did not terminate before.}, language = {en} } @book{SchneiderMaximovaGiese2022, author = {Schneider, Sven and Maximova, Maria and Giese, Holger}, title = {Probabilistic metric temporal graph logic}, number = {146}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-532-3}, issn = {1613-5652}, doi = {10.25932/publishup-54586}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-545867}, publisher = {Universit{\"a}t Potsdam}, pages = {34}, year = {2022}, abstract = {Cyber-physical systems often encompass complex concurrent behavior with timing constraints and probabilistic failures on demand. The analysis whether such systems with probabilistic timed behavior adhere to a given specification is essential. When the states of the system can be represented by graphs, the rule-based formalism of Probabilistic Timed Graph Transformation Systems (PTGTSs) can be used to suitably capture structure dynamics as well as probabilistic and timed behavior of the system. The model checking support for PTGTSs w.r.t. properties specified using Probabilistic Timed Computation Tree Logic (PTCTL) has been already presented. Moreover, for timed graph-based runtime monitoring, Metric Temporal Graph Logic (MTGL) has been developed for stating metric temporal properties on identified subgraphs and their structural changes over time. In this paper, we (a) extend MTGL to the Probabilistic Metric Temporal Graph Logic (PMTGL) by allowing for the specification of probabilistic properties, (b) adapt our MTGL satisfaction checking approach to PTGTSs, and (c) combine the approaches for PTCTL model checking and MTGL satisfaction checking to obtain a Bounded Model Checking (BMC) approach for PMTGL. In our evaluation, we apply an implementation of our BMC approach in AutoGraph to a running example.}, language = {en} } @book{SchneiderMaximovaGiese2021, author = {Schneider, Sven and Maximova, Maria and Giese, Holger}, title = {Probabilistic metric temporal graph logic}, number = {140}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-517-0}, issn = {1613-5652}, doi = {10.25932/publishup-51506}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-515066}, publisher = {Universit{\"a}t Potsdam}, pages = {40}, year = {2021}, abstract = {Cyber-physical systems often encompass complex concurrent behavior with timing constraints and probabilistic failures on demand. The analysis whether such systems with probabilistic timed behavior adhere to a given specification is essential. When the states of the system can be represented by graphs, the rule-based formalism of Probabilistic Timed Graph Transformation Systems (PTGTSs) can be used to suitably capture structure dynamics as well as probabilistic and timed behavior of the system. The model checking support for PTGTSs w.r.t. properties specified using Probabilistic Timed Computation Tree Logic (PTCTL) has been already presented. Moreover, for timed graph-based runtime monitoring, Metric Temporal Graph Logic (MTGL) has been developed for stating metric temporal properties on identified subgraphs and their structural changes over time. In this paper, we (a) extend MTGL to the Probabilistic Metric Temporal Graph Logic (PMTGL) by allowing for the specification of probabilistic properties, (b) adapt our MTGL satisfaction checking approach to PTGTSs, and (c) combine the approaches for PTCTL model checking and MTGL satisfaction checking to obtain a Bounded Model Checking (BMC) approach for PMTGL. In our evaluation, we apply an implementation of our BMC approach in AutoGraph to a running example.}, language = {en} } @article{SchneiderLambersOrejas2021, author = {Schneider, Sven and Lambers, Leen and Orejas, Fernando}, title = {A logic-based incremental approach to graph repair featuring delta preservation}, series = {International journal on software tools for technology transfer : STTT}, volume = {23}, journal = {International journal on software tools for technology transfer : STTT}, number = {3}, publisher = {Springer}, address = {Berlin ; Heidelberg}, issn = {1433-2779}, doi = {10.1007/s10009-020-00584-x}, pages = {369 -- 410}, year = {2021}, abstract = {We introduce a logic-based incremental approach to graph repair, generating a sound and complete (upon termination) overview of least-changing graph repairs from which a user may select a graph repair based on non-formalized further requirements. This incremental approach features delta preservation as it allows to restrict the generation of graph repairs to delta-preserving graph repairs, which do not revert the additions and deletions of the most recent consistency-violating graph update. We specify consistency of graphs using the logic of nested graph conditions, which is equivalent to first-order logic on graphs. Technically, the incremental approach encodes if and how the graph under repair satisfies a graph condition using the novel data structure of satisfaction trees, which are adapted incrementally according to the graph updates applied. In addition to the incremental approach, we also present two state-based graph repair algorithms, which restore consistency of a graph independent of the most recent graph update and which generate additional graph repairs using a global perspective on the graph under repair. We evaluate the developed algorithms using our prototypical implementation in the tool AutoGraph and illustrate our incremental approach using a case study from the graph database domain.}, language = {en} } @book{SchneiderLambersOrejas2017, author = {Schneider, Sven and Lambers, Leen and Orejas, Fernando}, title = {Symbolic model generation for graph properties}, number = {115}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-396-1}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-103171}, publisher = {Universit{\"a}t Potsdam}, pages = {48}, year = {2017}, abstract = {Graphs are ubiquitous in Computer Science. For this reason, in many areas, it is very important to have the means to express and reason about graph properties. In particular, we want to be able to check automatically if a given graph property is satisfiable. Actually, in most application scenarios it is desirable to be able to explore graphs satisfying the graph property if they exist or even to get a complete and compact overview of the graphs satisfying the graph property. We show that the tableau-based reasoning method for graph properties as introduced by Lambers and Orejas paves the way for a symbolic model generation algorithm for graph properties. Graph properties are formulated in a dedicated logic making use of graphs and graph morphisms, which is equivalent to firstorder logic on graphs as introduced by Courcelle. Our parallelizable algorithm gradually generates a finite set of so-called symbolic models, where each symbolic model describes a set of finite graphs (i.e., finite models) satisfying the graph property. The set of symbolic models jointly describes all finite models for the graph property (complete) and does not describe any finite graph violating the graph property (sound). Moreover, no symbolic model is already covered by another one (compact). Finally, the algorithm is able to generate from each symbolic model a minimal finite model immediately and allows for an exploration of further finite models. The algorithm is implemented in the new tool AutoGraph.}, language = {en} } @book{SchneiderLambersOrejas2019, author = {Schneider, Sven and Lambers, Leen and Orejas, Fernando}, title = {A logic-based incremental approach to graph repair}, number = {126}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-462-3}, issn = {1613-5652}, doi = {10.25932/publishup-42751}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-427517}, publisher = {Universit{\"a}t Potsdam}, pages = {34}, year = {2019}, abstract = {Graph repair, restoring consistency of a graph, plays a prominent role in several areas of computer science and beyond: For example, in model-driven engineering, the abstract syntax of models is usually encoded using graphs. Flexible edit operations temporarily create inconsistent graphs not representing a valid model, thus requiring graph repair. Similarly, in graph databases—managing the storage and manipulation of graph data—updates may cause that a given database does not satisfy some integrity constraints, requiring also graph repair. We present a logic-based incremental approach to graph repair, generating a sound and complete (upon termination) overview of least-changing repairs. In our context, we formalize consistency by so-called graph conditions being equivalent to first-order logic on graphs. We present two kind of repair algorithms: State-based repair restores consistency independent of the graph update history, whereas deltabased (or incremental) repair takes this history explicitly into account. Technically, our algorithms rely on an existing model generation algorithm for graph conditions implemented in AutoGraph. Moreover, the delta-based approach uses the new concept of satisfaction (ST) trees for encoding if and how a graph satisfies a graph condition. We then demonstrate how to manipulate these STs incrementally with respect to a graph update.}, language = {en} } @article{SchneiderWenigPapenbrock2021, author = {Schneider, Johannes and Wenig, Phillip and Papenbrock, Thorsten}, title = {Distributed detection of sequential anomalies in univariate time series}, series = {The VLDB journal : the international journal on very large data bases}, volume = {30}, journal = {The VLDB journal : the international journal on very large data bases}, number = {4}, publisher = {Springer}, address = {Berlin}, issn = {1066-8888}, doi = {10.1007/s00778-021-00657-6}, pages = {579 -- 602}, year = {2021}, abstract = {The automated detection of sequential anomalies in time series is an essential task for many applications, such as the monitoring of technical systems, fraud detection in high-frequency trading, or the early detection of disease symptoms. All these applications require the detection to find all sequential anomalies possibly fast on potentially very large time series. In other words, the detection needs to be effective, efficient and scalable w.r.t. the input size. Series2Graph is an effective solution based on graph embeddings that are robust against re-occurring anomalies and can discover sequential anomalies of arbitrary length and works without training data. Yet, Series2Graph is no t scalable due to its single-threaded approach; it cannot, in particular, process arbitrarily large sequences due to the memory constraints of a single machine. In this paper, we propose our distributed anomaly detection system, short DADS, which is an efficient and scalable adaptation of Series2Graph. Based on the actor programming model, DADS distributes the input time sequence, intermediate state and the computation to all processors of a cluster in a way that minimizes communication costs and synchronization barriers. Our evaluation shows that DADS is orders of magnitude faster than S2G, scales almost linearly with the number of processors in the cluster and can process much larger input sequences due to its scale-out property.}, language = {en} } @phdthesis{Schneider2019, author = {Schneider, Jan Niklas}, title = {Computational approaches for emotion research}, doi = {10.25932/publishup-45927}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-459275}, school = {Universit{\"a}t Potsdam}, pages = {xv, 145}, year = {2019}, abstract = {Emotionen sind ein zentrales Element menschlichen Erlebens und spielen eine wichtige Rolle bei der Entscheidungsfindung. Diese Dissertation identifiziert drei methodische Probleme der aktuellen Emotionsforschung und zeigt auf, wie diese mittels computergest{\"u}tzter Methoden gel{\"o}st werden k{\"o}nnen. Dieser Ansatz wird in drei Forschungsprojekten demonstriert, die die Entwicklung solcher Methoden sowie deren Anwendung auf konkrete Forschungsfragen beschreiben. Das erste Projekt beschreibt ein Paradigma welches es erm{\"o}glicht, die subjektive und objektive Schwierigkeit der Emotionswahrnehmung zu messen. Dar{\"u}ber hinaus erm{\"o}glicht es die Verwendung einer beliebigen Anzahl von Emotionskategorien im Vergleich zu den {\"u}blichen sechs Kategorien der Basisemotionen. Die Ergebnisse deuten auf eine Zunahme der Schwierigkeiten bei der Wahrnehmung von Emotionen mit zunehmendem Alter der Darsteller hin und liefern Hinweise darauf, dass junge Erwachsene, {\"a}ltere Menschen und M{\"a}nner ihre Schwierigkeit bei der Wahrnehmung von Emotionen untersch{\"a}tzen. Weitere Analysen zeigten eine geringe Relevanz personenbezogener Variablen und deuteten darauf hin, dass die Schwierigkeit der Emotionswahrnehmung vornehmlich durch die Auspr{\"a}gung der Wertigkeit des Ausdrucks bestimmt wird. Das zweite Projekt zeigt am Beispiel von Arousal, einem etablierten, aber vagen Konstrukt der Emotionsforschung, wie Face-Tracking-Daten dazu genutzt werden k{\"o}nnen solche Konstrukte zu sch{\"a}rfen. Es beschreibt, wie aus Face-Tracking-Daten Maße f{\"u}r die Entfernung, Geschwindigkeit und Beschleunigung von Gesichtsausdr{\"u}cken berechnet werden k{\"o}nnen. Das Projekt untersuchte wie diesen Maße mit der Arousal-Wahrnehmung in Menschen mit und ohne Autismus zusammenh{\"a}ngen. Der Abstand zum Neutralgesicht war pr{\"a}diktiv f{\"u}r die Arousal-Bewertungen in beiden Gruppen. Die Ergebnisse deuten auf eine qualitativ {\"a}hnliche Wahrnehmung von Arousal f{\"u}r Menschen mit und ohne Autismus hin. Im dritten Projekt stellen wir die Partial-Least-Squares-Analyse als allgemeine Methode vor, um eine optimale Repr{\"a}sentation zur Verkn{\"u}pfung zweier hochdimensionale Datens{\"a}tze zu finden. Das Projekt demonstriert die Anwendbarkeit dieser Methode in der Emotionsforschung anhand der Frage nach Unterschieden in der Emotionswahrnehmung zwischen M{\"a}nnern und Frauen. Wir konnten zeigen, dass die emotionale Wahrnehmung von Frauen systematisch mehr Varianz der Gesichtsausdr{\"u}cke erfasst und dass signifikante Unterschiede in der Art und Weise bestehen, wie Frauen und M{\"a}nner einige Gesichtsausdr{\"u}cke wahrnehmen. Diese konnten wir als dynamische Gesichtsausdr{\"u}cke visualisieren. Um die Anwendung der entwickelten Methode f{\"u}r die Forschungsgemeinschaft zu erleichtern, wurde ein Software-Paket f{\"u}r die Statistikumgebung R geschrieben. Zudem wurde eine Website entwickelt (thisemotiondoesnotexist.com), die es Besuchern erlaubt, ein Partial-Least-Squares-Modell von Emotionsbewertungen und Face-Tracking-Daten interaktiv zu erkunden, um die entwickelte Methode zu verbreiten und ihren Nutzen f{\"u}r die Emotionsforschung zu illustrieren.}, language = {en} } @book{SchmiedgenRhinowKoeppenetal.2015, author = {Schmiedgen, Jan and Rhinow, Holger and K{\"o}ppen, Eva and Meinel, Christoph}, title = {Parts without a whole?}, number = {97}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-334-3}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-79969}, publisher = {Universit{\"a}t Potsdam}, pages = {143}, year = {2015}, abstract = {This explorative study gives a descriptive overview of what organizations do and experience when they say they practice design thinking. It looks at how the concept has been appropriated in organizations and also describes patterns of design thinking adoption. The authors use a mixed-method research design fed by two sources: questionnaire data and semi-structured personal expert interviews. The study proceeds in six parts: (1) design thinking¹s entry points into organizations; (2) understandings of the descriptor; (3) its fields of application and organizational localization; (4) its perceived impact; (5) reasons for its discontinuation or failure; and (6) attempts to measure its success. In conclusion the report challenges managers to be more conscious of their current design thinking practice. The authors suggest a co-evolution of the concept¹s introduction with innovation capability building and the respective changes in leadership approaches. It is argued that this might help in unfolding design thinking¹s hidden potentials as well as preventing unintended side-effects such as discontented teams or the dwindling authority of managers.}, language = {en} } @article{SchmidlPapenbrock2022, author = {Schmidl, Sebastian and Papenbrock, Thorsten}, title = {Efficient distributed discovery of bidirectional order dependencies}, series = {The VLDB journal}, volume = {31}, journal = {The VLDB journal}, number = {1}, publisher = {Springer}, address = {Berlin ; Heidelberg ; New York}, issn = {1066-8888}, doi = {10.1007/s00778-021-00683-4}, pages = {49 -- 74}, year = {2022}, abstract = {Bidirectional order dependencies (bODs) capture order relationships between lists of attributes in a relational table. They can express that, for example, sorting books by publication date in ascending order also sorts them by age in descending order. The knowledge about order relationships is useful for many data management tasks, such as query optimization, data cleaning, or consistency checking. Because the bODs of a specific dataset are usually not explicitly given, they need to be discovered. The discovery of all minimal bODs (in set-based canonical form) is a task with exponential complexity in the number of attributes, though, which is why existing bOD discovery algorithms cannot process datasets of practically relevant size in a reasonable time. In this paper, we propose the distributed bOD discovery algorithm DISTOD, whose execution time scales with the available hardware. DISTOD is a scalable, robust, and elastic bOD discovery approach that combines efficient pruning techniques for bOD candidates in set-based canonical form with a novel, reactive, and distributed search strategy. Our evaluation on various datasets shows that DISTOD outperforms both single-threaded and distributed state-of-the-art bOD discovery algorithms by up to orders of magnitude; it can, in particular, process much larger datasets.}, language = {en} } @phdthesis{Schindler2016, author = {Schindler, Sven}, title = {Honeypot Architectures for IPv6 Networks}, school = {Universit{\"a}t Potsdam}, pages = {164}, year = {2016}, language = {en} } @article{SchindlerMoldenhawerStangeetal.2021, author = {Schindler, Daniel and Moldenhawer, Ted and Stange, Maike and Lepro, Valentino and Beta, Carsten and Holschneider, Matthias and Huisinga, Wilhelm}, title = {Analysis of protrusion dynamics in amoeboid cell motility by means of regularized contour flows}, series = {PLoS Computational Biology : a new community journal}, volume = {17}, journal = {PLoS Computational Biology : a new community journal}, number = {8}, publisher = {PLoS}, address = {San Fransisco}, issn = {1553-734X}, doi = {10.1371/journal.pcbi.1009268}, pages = {33}, year = {2021}, abstract = {Amoeboid cell motility is essential for a wide range of biological processes including wound healing, embryonic morphogenesis, and cancer metastasis. It relies on complex dynamical patterns of cell shape changes that pose long-standing challenges to mathematical modeling and raise a need for automated and reproducible approaches to extract quantitative morphological features from image sequences. Here, we introduce a theoretical framework and a computational method for obtaining smooth representations of the spatiotemporal contour dynamics from stacks of segmented microscopy images. Based on a Gaussian process regression we propose a one-parameter family of regularized contour flows that allows us to continuously track reference points (virtual markers) between successive cell contours. We use this approach to define a coordinate system on the moving cell boundary and to represent different local geometric quantities in this frame of reference. In particular, we introduce the local marker dispersion as a measure to identify localized membrane expansions and provide a fully automated way to extract the properties of such expansions, including their area and growth time. The methods are available as an open-source software package called AmoePy, a Python-based toolbox for analyzing amoeboid cell motility (based on time-lapse microscopy data), including a graphical user interface and detailed documentation. Due to the mathematical rigor of our framework, we envision it to be of use for the development of novel cell motility models. We mainly use experimental data of the social amoeba Dictyostelium discoideum to illustrate and validate our approach.
Author summary Amoeboid motion is a crawling-like cell migration that plays an important key role in multiple biological processes such as wound healing and cancer metastasis. This type of cell motility results from expanding and simultaneously contracting parts of the cell membrane. From fluorescence images, we obtain a sequence of points, representing the cell membrane, for each time step. By using regression analysis on these sequences, we derive smooth representations, so-called contours, of the membrane. Since the number of measurements is discrete and often limited, the question is raised of how to link consecutive contours with each other. In this work, we present a novel mathematical framework in which these links are described by regularized flows allowing a certain degree of concentration or stretching of neighboring reference points on the same contour. This stretching rate, the so-called local dispersion, is used to identify expansions and contractions of the cell membrane providing a fully automated way of extracting properties of these cell shape changes. We applied our methods to time-lapse microscopy data of the social amoeba Dictyostelium discoideum.}, language = {en} } @article{Schiller2015, author = {Schiller, Thomas}, title = {Teaching Information Security (as Part of Key Competencies)}, series = {KEYCIT 2014 - Key Competencies in Informatics and ICT}, journal = {KEYCIT 2014 - Key Competencies in Informatics and ICT}, number = {7}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-82960}, pages = {401 -- 404}, year = {2015}, abstract = {The poster and abstract describe the importance of teaching information security in school. After a short description of information security and important aspects, I will show, how information security fits into different guidelines or models for computer science educations and that it is therefore on of the key competencies. Afterwards I will present you a rough insight of teaching information security in Austria.}, language = {en} } @book{ScherbaumMzhavanadzeArometal.2020, author = {Scherbaum, Frank and Mzhavanadze, Nana and Arom, Simha and Rosenzweig, Sebastian and M{\"u}ller, Meinard}, title = {Tonal Organization of the Erkomaishvili Dataset: Pitches, Scales, Melodies and Harmonies}, series = {Computational Analysis Of Traditional Georgian Vocal Music}, journal = {Computational Analysis Of Traditional Georgian Vocal Music}, number = {1}, editor = {Scherbaum, Frank}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {2702-2641}, doi = {10.25932/publishup-47614}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-476141}, publisher = {Universit{\"a}t Potsdam}, pages = {64}, year = {2020}, abstract = {In this study we examine the tonal organization of a series of recordings of liturgical chants, sung in 1966 by the Georgian master singer Artem Erkomaishvili. This dataset is the oldest corpus of Georgian chants from which the time synchronous F0-trajectories for all three voices have been reliably determined (M{\"u}ller et al. 2017). It is therefore of outstanding importance for the understanding of the tuning principles of traditional Georgian vocal music. The aim of the present study is to use various computational methods to analyze what these recordings can contribute to the ongoing scientific dispute about traditional Georgian tuning systems. Starting point for the present analysis is the re-release of the original audio data together with estimated fundamental frequency (F0) trajectories for each of the three voices, beat annotations, and digital scores (Rosenzweig et al. 2020). We present synoptic models for the pitch and the harmonic interval distributions, which are the first of such models for which the complete Erkomaishvili dataset was used. We show that these distributions can be very compactly be expressed as Gaussian mixture models, anchored on discrete sets of pitch or interval values for the pitch and interval distributions, respectively. As part of our study we demonstrate that these pitch values, which we refer to as scale pitches, and which are determined as the mean values of the Gaussian mixture elements, define the scale degrees of the melodic sound scales which build the skeleton of Artem Erkomaishvili's intonation. The observation of consistent pitch bending of notes in melodic phrases, which appear in identical form in a group of chants, as well as the observation of harmonically driven intonation adjustments, which are clearly documented for all pure harmonic intervals, demonstrate that Artem Erkomaishvili intentionally deviates from the scale pitch skeleton quite freely. As a central result of our study, we proof that this melodic freedom is always constrained by the attracting influence of the scale pitches. Deviations of the F0-values of individual note events from the scale pitches at one instance of time are compensated for in the subsequent melodic steps. This suggests a deviation-compensation mechanism at the core of Artem Erkomaishvili's melody generation, which clearly honors the scales but still allows for a large degree of melodic flexibility. This model, which summarizes all partial aspects of our analysis, is consistent with the melodic scale models derived from the observed pitch distributions, as well as with the melodic and harmonic interval distributions. In addition to the tangible results of our work, we believe that our work has general implications for the determination of tuning models from audio data, in particular for non-tempered music.}, language = {en} } @phdthesis{Scheffler2013, author = {Scheffler, Thomas}, title = {Privacy enforcement with data owner-defined policies}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-67939}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {This thesis proposes a privacy protection framework for the controlled distribution and use of personal private data. The framework is based on the idea that privacy policies can be set directly by the data owner and can be automatically enforced against the data user. Data privacy continues to be a very important topic, as our dependency on electronic communication maintains its current growth, and private data is shared between multiple devices, users and locations. The growing amount and the ubiquitous availability of personal private data increases the likelihood of data misuse. Early privacy protection techniques, such as anonymous email and payment systems have focused on data avoidance and anonymous use of services. They did not take into account that data sharing cannot be avoided when people participate in electronic communication scenarios that involve social interactions. This leads to a situation where data is shared widely and uncontrollably and in most cases the data owner has no control over further distribution and use of personal private data. Previous efforts to integrate privacy awareness into data processing workflows have focused on the extension of existing access control frameworks with privacy aware functions or have analysed specific individual problems such as the expressiveness of policy languages. So far, very few implementations of integrated privacy protection mechanisms exist and can be studied to prove their effectiveness for privacy protection. Second level issues that stem from practical application of the implemented mechanisms, such as usability, life-time data management and changes in trustworthiness have received very little attention so far, mainly because they require actual implementations to be studied. Most existing privacy protection schemes silently assume that it is the privilege of the data user to define the contract under which personal private data is released. Such an approach simplifies policy management and policy enforcement for the data user, but leaves the data owner with a binary decision to submit or withhold his or her personal data based on the provided policy. We wanted to empower the data owner to express his or her privacy preferences through privacy policies that follow the so-called Owner-Retained Access Control (ORAC) model. ORAC has been proposed by McCollum, et al. as an alternate access control mechanism that leaves the authority over access decisions by the originator of the data. The data owner is given control over the release policy for his or her personal data, and he or she can set permissions or restrictions according to individually perceived trust values. Such a policy needs to be expressed in a coherent way and must allow the deterministic policy evaluation by different entities. The privacy policy also needs to be communicated from the data owner to the data user, so that it can be enforced. Data and policy are stored together as a Protected Data Object that follows the Sticky Policy paradigm as defined by Mont, et al. and others. We developed a unique policy combination approach that takes usability aspects for the creation and maintenance of policies into consideration. Our privacy policy consists of three parts: A Default Policy provides basic privacy protection if no specific rules have been entered by the data owner. An Owner Policy part allows the customisation of the default policy by the data owner. And a so-called Safety Policy guarantees that the data owner cannot specify disadvantageous policies, which, for example, exclude him or her from further access to the private data. The combined evaluation of these three policy-parts yields the necessary access decision. The automatic enforcement of privacy policies in our protection framework is supported by a reference monitor implementation. We started our work with the development of a client-side protection mechanism that allows the enforcement of data-use restrictions after private data has been released to the data user. The client-side enforcement component for data-use policies is based on a modified Java Security Framework. Privacy policies are translated into corresponding Java permissions that can be automatically enforced by the Java Security Manager. When we later extended our work to implement server-side protection mechanisms, we found several drawbacks for the privacy enforcement through the Java Security Framework. We solved this problem by extending our reference monitor design to use Aspect-Oriented Programming (AOP) and the Java Reflection API to intercept data accesses in existing applications and provide a way to enforce data owner-defined privacy policies for business applications.}, language = {en} } @article{Scheele2014, author = {Scheele, Lasse}, title = {Location analysis for placing artificial reefs}, series = {Process design for natural scientists: an agile model-driven approach}, journal = {Process design for natural scientists: an agile model-driven approach}, number = {500}, publisher = {Springer}, address = {Berlin}, isbn = {978-3-662-45005-5}, issn = {1865-0929}, pages = {216 -- 228}, year = {2014}, abstract = {Location analyses are among the most common tasks while working with spatial data and geographic information systems. Automating the most frequently used procedures is therefore an important aspect of improving their usability. In this context, this project aims to design and implement a workflow, providing some basic tools for a location analysis. For the implementation with jABC, the workflow was applied to the problem of finding a suitable location for placing an artificial reef. For this analysis three parameters (bathymetry, slope and grain size of the ground material) were taken into account, processed, and visualized with the The Generic Mapping Tools (GMT), which were integrated into the workflow as jETI-SIBs. The implemented workflow thereby showed that the approach to combine jABC with GMT resulted in an user-centric yet user-friendly tool with high-quality cartographic outputs.}, language = {en} } @article{SchaubWoltran2018, author = {Schaub, Torsten H. and Woltran, Stefan}, title = {Answer set programming unleashed!}, series = {K{\"u}nstliche Intelligenz}, volume = {32}, journal = {K{\"u}nstliche Intelligenz}, number = {2-3}, publisher = {Springer}, address = {Heidelberg}, issn = {0933-1875}, doi = {10.1007/s13218-018-0550-z}, pages = {105 -- 108}, year = {2018}, abstract = {Answer Set Programming faces an increasing popularity for problem solving in various domains. While its modeling language allows us to express many complex problems in an easy way, its solving technology enables their effective resolution. In what follows, we detail some of the key factors of its success. Answer Set Programming [ASP; Brewka et al. Commun ACM 54(12):92-103, (2011)] is seeing a rapid proliferation in academia and industry due to its easy and flexible way to model and solve knowledge-intense combinatorial (optimization) problems. To this end, ASP offers a high-level modeling language paired with high-performance solving technology. As a result, ASP systems provide out-off-the-box, general-purpose search engines that allow for enumerating (optimal) solutions. They are represented as answer sets, each being a set of atoms representing a solution. The declarative approach of ASP allows a user to concentrate on a problem's specification rather than the computational means to solve it. This makes ASP a prime candidate for rapid prototyping and an attractive tool for teaching key AI techniques since complex problems can be expressed in a succinct and elaboration tolerant way. This is eased by the tuning of ASP's modeling language to knowledge representation and reasoning (KRR). The resulting impact is nicely reflected by a growing range of successful applications of ASP [Erdem et al. AI Mag 37(3):53-68, 2016; Falkner et al. Industrial applications of answer set programming. K++nstliche Intelligenz (2018)]}, language = {en} } @misc{SchaubWoltran2018, author = {Schaub, Torsten H. and Woltran, Stefan}, title = {Special issue on answer set programming}, series = {K{\"u}nstliche Intelligenz}, volume = {32}, journal = {K{\"u}nstliche Intelligenz}, number = {2-3}, publisher = {Springer}, address = {Heidelberg}, issn = {0933-1875}, doi = {10.1007/s13218-018-0554-8}, pages = {101 -- 103}, year = {2018}, language = {en} } @phdthesis{Sawade2012, author = {Sawade, Christoph}, title = {Active evaluation of predictive models}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-255-1}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-65583}, school = {Universit{\"a}t Potsdam}, pages = {ix, 157}, year = {2012}, abstract = {The field of machine learning studies algorithms that infer predictive models from data. Predictive models are applicable for many practical tasks such as spam filtering, face and handwritten digit recognition, and personalized product recommendation. In general, they are used to predict a target label for a given data instance. In order to make an informed decision about the deployment of a predictive model, it is crucial to know the model's approximate performance. To evaluate performance, a set of labeled test instances is required that is drawn from the distribution the model will be exposed to at application time. In many practical scenarios, unlabeled test instances are readily available, but the process of labeling them can be a time- and cost-intensive task and may involve a human expert. This thesis addresses the problem of evaluating a given predictive model accurately with minimal labeling effort. We study an active model evaluation process that selects certain instances of the data according to an instrumental sampling distribution and queries their labels. We derive sampling distributions that minimize estimation error with respect to different performance measures such as error rate, mean squared error, and F-measures. An analysis of the distribution that governs the estimator leads to confidence intervals, which indicate how precise the error estimation is. Labeling costs may vary across different instances depending on certain characteristics of the data. For instance, documents differ in their length, comprehensibility, and technical requirements; these attributes affect the time a human labeler needs to judge relevance or to assign topics. To address this, the sampling distribution is extended to incorporate instance-specific costs. We empirically study conditions under which the active evaluation processes are more accurate than a standard estimate that draws equally many instances from the test distribution. We also address the problem of comparing the risks of two predictive models. The standard approach would be to draw instances according to the test distribution, label the selected instances, and apply statistical tests to identify significant differences. Drawing instances according to an instrumental distribution affects the power of a statistical test. We derive a sampling procedure that maximizes test power when used to select instances, and thereby minimizes the likelihood of choosing the inferior model. Furthermore, we investigate the task of comparing several alternative models; the objective of an evaluation could be to rank the models according to the risk that they incur or to identify the model with lowest risk. An experimental study shows that the active procedure leads to higher test power than the standard test in many application domains. Finally, we study the problem of evaluating the performance of ranking functions, which are used for example for web search. In practice, ranking performance is estimated by applying a given ranking model to a representative set of test queries and manually assessing the relevance of all retrieved items for each query. We apply the concepts of active evaluation and active comparison to ranking functions and derive optimal sampling distributions for the commonly used performance measures Discounted Cumulative Gain and Expected Reciprocal Rank. Experiments on web search engine data illustrate significant reductions in labeling costs.}, language = {en} } @phdthesis{Sapegin2018, author = {Sapegin, Andrey}, title = {High-Speed Security Log Analytics Using Hybrid Outlier Detection}, doi = {10.25932/publishup-42611}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-426118}, school = {Universit{\"a}t Potsdam}, pages = {162}, year = {2018}, abstract = {The rapid development and integration of Information Technologies over the last decades influenced all areas of our life, including the business world. Yet not only the modern enterprises become digitalised, but also security and criminal threats move into the digital sphere. To withstand these threats, modern companies must be aware of all activities within their computer networks. The keystone for such continuous security monitoring is a Security Information and Event Management (SIEM) system that collects and processes all security-related log messages from the entire enterprise network. However, digital transformations and technologies, such as network virtualisation and widespread usage of mobile communications, lead to a constantly increasing number of monitored devices and systems. As a result, the amount of data that has to be processed by a SIEM system is increasing rapidly. Besides that, in-depth security analysis of the captured data requires the application of rather sophisticated outlier detection algorithms that have a high computational complexity. Existing outlier detection methods often suffer from performance issues and are not directly applicable for high-speed and high-volume analysis of heterogeneous security-related events, which becomes a major challenge for modern SIEM systems nowadays. This thesis provides a number of solutions for the mentioned challenges. First, it proposes a new SIEM system architecture for high-speed processing of security events, implementing parallel, in-memory and in-database processing principles. The proposed architecture also utilises the most efficient log format for high-speed data normalisation. Next, the thesis offers several novel high-speed outlier detection methods, including generic Hybrid Outlier Detection that can efficiently be used for Big Data analysis. Finally, the special User Behaviour Outlier Detection is proposed for better threat detection and analysis of particular user behaviour cases. The proposed architecture and methods were evaluated in terms of both performance and accuracy, as well as compared with classical architecture and existing algorithms. These evaluations were performed on multiple data sets, including simulated data, well-known public intrusion detection data set, and real data from the large multinational enterprise. The evaluation results have proved the high performance and efficacy of the developed methods. All concepts proposed in this thesis were integrated into the prototype of the SIEM system, capable of high-speed analysis of Big Security Data, which makes this integrated SIEM platform highly relevant for modern enterprise security applications.}, language = {en} } @phdthesis{Saleh2016, author = {Saleh, Eyad}, title = {Securing Multi-tenant SaaS Environments}, school = {Universit{\"a}t Potsdam}, pages = {108}, year = {2016}, abstract = {Software-as-a-Service (SaaS) offers several advantages to both service providers and users. Service providers can benefit from the reduction of Total Cost of Ownership (TCO), better scalability, and better resource utilization. On the other hand, users can use the service anywhere and anytime, and minimize upfront investment by following the pay-as-you-go model. Despite the benefits of SaaS, users still have concerns about the security and privacy of their data. Due to the nature of SaaS and the Cloud in general, the data and the computation are beyond the users' control, and hence data security becomes a vital factor in this new paradigm. Furthermore, in multi-tenant SaaS applications, the tenants become more concerned about the confidentiality of their data since several tenants are co-located onto a shared infrastructure. To address those concerns, we start protecting the data from the provisioning process by controlling how tenants are being placed in the infrastructure. We present a resource allocation algorithm designed to minimize the risk of co-resident tenants called SecPlace. It enables the SaaS provider to control the resource (i.e., database instance) allocation process while taking into account the security of tenants as a requirement. Due to the design principles of the multi-tenancy model, tenants follow some degree of sharing on both application and infrastructure levels. Thus, strong security-isolation should be present. Therefore, we develop SignedQuery, a technique that prevents one tenant from accessing others' data. We use the Signing Concept to create a signature that is used to sign the tenant's request, then the server can verifies the signature and recognizes the requesting tenant, and hence ensures that the data to be accessed is belonging to the legitimate tenant. Finally, Data confidentiality remains a critical concern due to the fact that data in the Cloud is out of users' premises, and hence beyond their control. Cryptography is increasingly proposed as a potential approach to address such a challenge. Therefore, we present SecureDB, a system designed to run SQL-based applications over an encrypted database. SecureDB captures the schema design and analyzes it to understand the internal structure of the data (i.e., relationships between the tables and their attributes). Moreover, we determine the appropriate partialhomomorphic encryption scheme for each attribute where computation is possible even when the data is encrypted. To evaluate our work, we conduct extensive experiments with di↵erent settings. The main use case in our work is a popular open source HRM application, called OrangeHRM. The results show that our multi-layered approach is practical, provides enhanced security and isolation among tenants, and have a moderate complexity in terms of processing encrypted data.}, language = {en} } @phdthesis{Sakizloglou2023, author = {Sakizloglou, Lucas}, title = {Evaluating temporal queries over history-aware architectural runtime models}, doi = {10.25932/publishup-60439}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-604396}, school = {Universit{\"a}t Potsdam}, pages = {v, 168}, year = {2023}, abstract = {In model-driven engineering, the adaptation of large software systems with dynamic structure is enabled by architectural runtime models. Such a model represents an abstract state of the system as a graph of interacting components. Every relevant change in the system is mirrored in the model and triggers an evaluation of model queries, which search the model for structural patterns that should be adapted. This thesis focuses on a type of runtime models where the expressiveness of the model and model queries is extended to capture past changes and their timing. These history-aware models and temporal queries enable more informed decision-making during adaptation, as they support the formulation of requirements on the evolution of the pattern that should be adapted. However, evaluating temporal queries during adaptation poses significant challenges. First, it implies the capability to specify and evaluate requirements on the structure, as well as the ordering and timing in which structural changes occur. Then, query answers have to reflect that the history-aware model represents the architecture of a system whose execution may be ongoing, and thus answers may depend on future changes. Finally, query evaluation needs to be adequately fast and memory-efficient despite the increasing size of the history---especially for models that are altered by numerous, rapid changes. The thesis presents a query language and a querying approach for the specification and evaluation of temporal queries. These contributions aim to cope with the challenges of evaluating temporal queries at runtime, a prerequisite for history-aware architectural monitoring and adaptation which has not been systematically treated by prior model-based solutions. The distinguishing features of our contributions are: the specification of queries based on a temporal logic which encodes structural patterns as graphs; the provision of formally precise query answers which account for timing constraints and ongoing executions; the incremental evaluation which avoids the re-computation of query answers after each change; and the option to discard history that is no longer relevant to queries. The query evaluation searches the model for occurrences of a pattern whose evolution satisfies a temporal logic formula. Therefore, besides model-driven engineering, another related research community is runtime verification. The approach differs from prior logic-based runtime verification solutions by supporting the representation and querying of structure via graphs and graph queries, respectively, which is more efficient for queries with complex patterns. We present a prototypical implementation of the approach and measure its speed and memory consumption in monitoring and adaptation scenarios from two application domains, with executions of an increasing size. We assess scalability by a comparison to the state-of-the-art from both related research communities. The implementation yields promising results, which pave the way for sophisticated history-aware self-adaptation solutions and indicate that the approach constitutes a highly effective technique for runtime monitoring on an architectural level.}, language = {en} } @article{Saito2015, author = {Saito, Toshinori}, title = {The Key Competencies in Informatics and ICT viewed from Nussbaum's Ten Central Capabilities}, series = {KEYCIT 2014 - Key Competencies in Informatics and ICT}, journal = {KEYCIT 2014 - Key Competencies in Informatics and ICT}, number = {7}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-82718}, pages = {253 -- 266}, year = {2015}, abstract = {This article shows a discussion about the key competencies in informatics and ICT viewed from a philosophical foundation presented by Martha Nussbaum, which is known as 'ten central capabilities'. Firstly, the outline of 'The Capability Approach', which has been presented by Amartya Sen and Nussbaum as a theoretical framework of assessing the state of social welfare, will be explained. Secondly, the body of Nussbaum's ten central capabilities and the reason for being applied as the basis of discussion will be shown. Thirdly, the relationship between the concept of 'capability' and 'competency' is to be discussed. After that, the author's assumption of the key competencies in informatics and ICT led from the examination of Nussbaum's ten capabilities will be presented.}, language = {en} } @phdthesis{SadrAzodi2015, author = {Sadr-Azodi, Amir Shahab}, title = {Towards Real-time SIEM-based Network monitoring and Intrusion Detection through Advanced Event Normalization}, school = {Universit{\"a}t Potsdam}, pages = {144}, year = {2015}, language = {en} } @article{RosinLaiMouldetal.2022, author = {Rosin, Paul L. and Lai, Yu-Kun and Mould, David and Yi, Ran and Berger, Itamar and Doyle, Lars and Lee, Seungyong and Li, Chuan and Liu, Yong-Jin and Semmo, Amir and Shamir, Ariel and Son, Minjung and Winnem{\"o}ller, Holger}, title = {NPRportrait 1.0: A three-level benchmark for non-photorealistic rendering of portraits}, series = {Computational visual media}, volume = {8}, journal = {Computational visual media}, number = {3}, publisher = {Springer Nature}, address = {London}, issn = {2096-0433}, doi = {10.1007/s41095-021-0255-3}, pages = {445 -- 465}, year = {2022}, abstract = {Recently, there has been an upsurge of activity in image-based non-photorealistic rendering (NPR), and in particular portrait image stylisation, due to the advent of neural style transfer (NST). However, the state of performance evaluation in this field is poor, especially compared to the norms in the computer vision and machine learning communities. Unfortunately, the task of evaluating image stylisation is thus far not well defined, since it involves subjective, perceptual, and aesthetic aspects. To make progress towards a solution, this paper proposes a new structured, three-level, benchmark dataset for the evaluation of stylised portrait images. Rigorous criteria were used for its construction, and its consistency was validated by user studies. Moreover, a new methodology has been developed for evaluating portrait stylisation algorithms, which makes use of the different benchmark levels as well as annotations provided by user studies regarding the characteristics of the faces. We perform evaluation for a wide variety of image stylisation methods (both portrait-specific and general purpose, and also both traditional NPR approaches and NST) using the new benchmark dataset.}, language = {en} } @article{RoostapourNeumannNeumannetal.2022, author = {Roostapour, Vahid and Neumann, Aneta and Neumann, Frank and Friedrich, Tobias}, title = {Pareto optimization for subset selection with dynamic cost constraints}, series = {Artificial intelligence}, volume = {302}, journal = {Artificial intelligence}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0004-3702}, doi = {10.1016/j.artint.2021.103597}, pages = {17}, year = {2022}, abstract = {We consider the subset selection problem for function f with constraint bound B that changes over time. Within the area of submodular optimization, various greedy approaches are commonly used. For dynamic environments we observe that the adaptive variants of these greedy approaches are not able to maintain their approximation quality. Investigating the recently introduced POMC Pareto optimization approach, we show that this algorithm efficiently computes a phi=(alpha(f)/2)(1 - 1/e(alpha)f)-approximation, where alpha(f) is the submodularity ratio of f, for each possible constraint bound b <= B. Furthermore, we show that POMC is able to adapt its set of solutions quickly in the case that B increases. Our experimental investigations for the influence maximization in social networks show the advantage of POMC over generalized greedy algorithms. We also consider EAMC, a new evolutionary algorithm with polynomial expected time guarantee to maintain phi approximation ratio, and NSGA-II with two different population sizes as advanced multi-objective optimization algorithm, to demonstrate their challenges in optimizing the maximum coverage problem. Our empirical analysis shows that, within the same number of evaluations, POMC is able to perform as good as NSGA-II under linear constraint, while EAMC performs significantly worse than all considered algorithms in most cases.}, language = {en} } @article{RojahnWeberGronau2023, author = {Rojahn, Marcel and Weber, Edzard and Gronau, Norbert}, title = {Towards a standardization in scheduling models}, series = {International journal of industrial and systems engineering}, volume = {17}, journal = {International journal of industrial and systems engineering}, number = {6}, publisher = {Inderscience Enterprises}, address = {Gen{\`e}ve}, issn = {1748-5037}, pages = {401 -- 408}, year = {2023}, abstract = {Terminology is a critical instrument for each researcher. Different terminologies for the same research object may arise in different research communities. By this inconsistency, many synergistic effects get lost. Theories and models will be more understandable and reusable if a common terminology is applied. This paper examines the terminological (in)consistence for the research field of job-shop scheduling by a literature review. There is an enormous variety in the choice of terms and mathematical notation for the same concept. The comparability, reusability and combinability of scheduling methods is unnecessarily hampered by the arbitrary use of homonyms and synonyms. The acceptance in the community of used variables and notation forms is shown by means of a compliance quotient. This is proven by the evaluation of 240 scientific publications on planning methods.}, language = {en} } @inproceedings{RojahnGronau2023, author = {Rojahn, Marcel and Gronau, Norbert}, title = {Digital platform concepts for manufacturing companies}, series = {10th International Conference on Future Internet of Things and Cloud (FiCloud)}, booktitle = {10th International Conference on Future Internet of Things and Cloud (FiCloud)}, publisher = {IEEE}, address = {[Erscheinungsort nicht ermittelbar]}, isbn = {979-8-3503-1635-3}, doi = {10.1109/FiCloud58648.2023.00030}, pages = {149 -- 158}, year = {2023}, abstract = {Digital Platforms (DPs) has established themself in recent years as a central concept of the Information Technology Science. Due to the great diversity of digital platform concepts, clear definitions are still required. Furthermore, DPs are subject to dynamic changes from internal and external factors, which pose challenges for digital platform operators, developers and customers. Which current digital platform research directions should be taken to address these challenges remains open so far. The following paper aims to contribute to this by outlining a systematic literature review (SLR) on digital platform concepts in the context of the Industrial Internet of Things (IIoT) for manufacturing companies and provides a basis for (1) a selection of definitions of current digital platform and ecosystem concepts and (2) a selection of current digital platform research directions. These directions are diverted into (a) occurrence of digital platforms, (b) emergence of digital platforms, (c) evaluation of digital platforms, (d) development of digital platforms, and (e) selection of digital platforms.}, language = {en} } @inproceedings{RojahnGronau2024, author = {Rojahn, Marcel and Gronau, Norbert}, title = {Openness indicators for the evaluation of digital platforms between the launch and maturity phase}, series = {Proceedings of the 57th Annual Hawaii International Conference on System Sciences}, booktitle = {Proceedings of the 57th Annual Hawaii International Conference on System Sciences}, editor = {Bui, Tung X.}, publisher = {Department of IT Management Shidler College of Business University of Hawaii}, address = {Honolulu, HI}, isbn = {978-0-99813-317-1}, pages = {4516 -- 4525}, year = {2024}, abstract = {In recent years, the evaluation of digital platforms has become an important focus in the field of information systems science. The identification of influential indicators that drive changes in digital platforms, specifically those related to openness, is still an unresolved issue. This paper addresses the challenge of identifying measurable indicators and characterizing the transition from launch to maturity in digital platforms. It proposes a systematic analytical approach to identify relevant openness indicators for evaluation purposes. The main contributions of this study are the following (1) the development of a comprehensive procedure for analyzing indicators, (2) the categorization of indicators as evaluation metrics within a multidimensional grid-box model, (3) the selection and evaluation of relevant indicators, (4) the identification and assessment of digital platform architectures during the launch-to-maturity transition, and (5) the evaluation of the applicability of the conceptualization and design process for digital platform evaluation.}, language = {en} } @incollection{RojahnAmbrosBiruetal.2023, author = {Rojahn, Marcel and Ambros, Maximilian and Biru, Tibebu and Krallmann, Hermann and Gronau, Norbert and Grum, Marcus}, title = {Adequate basis for the data-driven and machine-learning-based identification}, series = {Artificial intelligence and soft computing}, booktitle = {Artificial intelligence and soft computing}, editor = {Rutkowski, Leszek and Scherer, Rafał and Korytkowski, Marcin and Pedrycz, Witold and Tadeusiewicz, Ryszard and Zurada, Jacek M.}, publisher = {Springer}, address = {Cham}, isbn = {978-3-031-42504-2}, doi = {10.1007/978-3-031-42505-9_48}, pages = {570 -- 588}, year = {2023}, abstract = {Process mining (PM) has established itself in recent years as a main method for visualizing and analyzing processes. However, the identification of knowledge has not been addressed adequately because PM aims solely at data-driven discovering, monitoring, and improving real-world processes from event logs available in various information systems. The following paper, therefore, outlines a novel systematic analysis view on tools for data-driven and machine learning (ML)-based identification of knowledge-intensive target processes. To support the effectiveness of the identification process, the main contributions of this study are (1) to design a procedure for a systematic review and analysis for the selection of relevant dimensions, (2) to identify different categories of dimensions as evaluation metrics to select source systems, algorithms, and tools for PM and ML as well as include them in a multi-dimensional grid box model, (3) to select and assess the most relevant dimensions of the model, (4) to identify and assess source systems, algorithms, and tools in order to find evidence for the selected dimensions, and (5) to assess the relevance and applicability of the conceptualization and design procedure for tool selection in data-driven and ML-based process mining research.}, language = {en} } @phdthesis{Rohloff2021, author = {Rohloff, Tobias}, title = {Learning analytics at scale}, doi = {10.25932/publishup-52623}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-526235}, school = {Universit{\"a}t Potsdam}, pages = {xvii, 138, lxvii}, year = {2021}, abstract = {Digital technologies are paving the way for innovative educational approaches. The learning format of Massive Open Online Courses (MOOCs) provides a highly accessible path to lifelong learning while being more affordable and flexible than face-to-face courses. Thereby, thousands of learners can enroll in courses mostly without admission restrictions, but this also raises challenges. Individual supervision by teachers is barely feasible, and learning persistence and success depend on students' self-regulatory skills. Here, technology provides the means for support. The use of data for decision-making is already transforming many fields, whereas in education, it is still a young research discipline. Learning Analytics (LA) is defined as the measurement, collection, analysis, and reporting of data about learners and their learning contexts with the purpose of understanding and improving learning and learning environments. The vast amount of data that MOOCs produce on the learning behavior and success of thousands of students provides the opportunity to study human learning and develop approaches addressing the demands of learners and teachers. The overall purpose of this dissertation is to investigate the implementation of LA at the scale of MOOCs and to explore how data-driven technology can support learning and teaching in this context. To this end, several research prototypes have been iteratively developed for the HPI MOOC Platform. Hence, they were tested and evaluated in an authentic real-world learning environment. Most of the results can be applied on a conceptual level to other MOOC platforms as well. The research contribution of this thesis thus provides practical insights beyond what is theoretically possible. In total, four system components were developed and extended: (1) The Learning Analytics Architecture: A technical infrastructure to collect, process, and analyze event-driven learning data based on schema-agnostic pipelining in a service-oriented MOOC platform. (2) The Learning Analytics Dashboard for Learners: A tool for data-driven support of self-regulated learning, in particular to enable learners to evaluate and plan their learning activities, progress, and success by themselves. (3) Personalized Learning Objectives: A set of features to better connect learners' success to their personal intentions based on selected learning objectives to offer guidance and align the provided data-driven insights about their learning progress. (4) The Learning Analytics Dashboard for Teachers: A tool supporting teachers with data-driven insights to enable the monitoring of their courses with thousands of learners, identify potential issues, and take informed action. For all aspects examined in this dissertation, related research is presented, development processes and implementation concepts are explained, and evaluations are conducted in case studies. Among other findings, the usage of the learner dashboard in combination with personalized learning objectives demonstrated improved certification rates of 11.62\% to 12.63\%. Furthermore, it was observed that the teacher dashboard is a key tool and an integral part for teaching in MOOCs. In addition to the results and contributions, general limitations of the work are discussed—which altogether provide a solid foundation for practical implications and future research.}, language = {en} } @book{RoggeSoltiMansvanderAalstetal.2013, author = {Rogge-Solti, Andreas and Mans, Ronny S. and van der Aalst, Wil M. P. and Weske, Mathias}, title = {Repairing event logs using stochastic process models}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-258-2}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-66797}, publisher = {Universit{\"a}t Potsdam}, pages = {19}, year = {2013}, abstract = {Companies strive to improve their business processes in order to remain competitive. Process mining aims to infer meaningful insights from process-related data and attracted the attention of practitioners, tool-vendors, and researchers in recent years. Traditionally, event logs are assumed to describe the as-is situation. But this is not necessarily the case in environments where logging may be compromised due to manual logging. For example, hospital staff may need to manually enter information regarding the patient's treatment. As a result, events or timestamps may be missing or incorrect. In this paper, we make use of process knowledge captured in process models, and provide a method to repair missing events in the logs. This way, we facilitate analysis of incomplete logs. We realize the repair by combining stochastic Petri nets, alignments, and Bayesian networks. We evaluate the results using both synthetic data and real event data from a Dutch hospital.}, language = {en} } @phdthesis{RoggeSolti2014, author = {Rogge-Solti, Andreas}, title = {Probabilistic Estimation of Unobserved Process Events}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-70426}, school = {Universit{\"a}t Potsdam}, year = {2014}, abstract = {Organizations try to gain competitive advantages, and to increase customer satisfaction. To ensure the quality and efficiency of their business processes, they perform business process management. An important part of process management that happens on the daily operational level is process controlling. A prerequisite of controlling is process monitoring, i.e., keeping track of the performed activities in running process instances. Only by process monitoring can business analysts detect delays and react to deviations from the expected or guaranteed performance of a process instance. To enable monitoring, process events need to be collected from the process environment. When a business process is orchestrated by a process execution engine, monitoring is available for all orchestrated process activities. Many business processes, however, do not lend themselves to automatic orchestration, e.g., because of required freedom of action. This situation is often encountered in hospitals, where most business processes are manually enacted. Hence, in practice it is often inefficient or infeasible to document and monitor every process activity. Additionally, manual process execution and documentation is prone to errors, e.g., documentation of activities can be forgotten. Thus, organizations face the challenge of process events that occur, but are not observed by the monitoring environment. These unobserved process events can serve as basis for operational process decisions, even without exact knowledge of when they happened or when they will happen. An exemplary decision is whether to invest more resources to manage timely completion of a case, anticipating that the process end event will occur too late. This thesis offers means to reason about unobserved process events in a probabilistic way. We address decisive questions of process managers (e.g., "when will the case be finished?", or "when did we perform the activity that we forgot to document?") in this thesis. As main contribution, we introduce an advanced probabilistic model to business process management that is based on a stochastic variant of Petri nets. We present a holistic approach to use the model effectively along the business process lifecycle. Therefore, we provide techniques to discover such models from historical observations, to predict the termination time of processes, and to ensure quality by missing data management. We propose mechanisms to optimize configuration for monitoring and prediction, i.e., to offer guidance in selecting important activities to monitor. An implementation is provided as a proof of concept. For evaluation, we compare the accuracy of the approach with that of state-of-the-art approaches using real process data of a hospital. Additionally, we show its more general applicability in other domains by applying the approach on process data from logistics and finance.}, language = {en} } @misc{RitterbuschTeichmann2023, author = {Ritterbusch, Georg David and Teichmann, Malte Rolf}, title = {Defining the metaverse}, series = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Wirtschafts- und Sozialwissenschaftliche Reihe}, journal = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Wirtschafts- und Sozialwissenschaftliche Reihe}, number = {159}, issn = {1867-5808}, doi = {10.25932/publishup-58879}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-588799}, pages = {12368 -- 12377}, year = {2023}, abstract = {The term Metaverse is emerging as a result of the late push by multinational technology conglomerates and a recent surge of interest in Web 3.0, Blockchain, NFT, and Cryptocurrencies. From a scientific point of view, there is no definite consensus on what the Metaverse will be like. This paper collects, analyzes, and synthesizes scientific definitions and the accompanying major characteristics of the Metaverse using the methodology of a Systematic Literature Review (SLR). Two revised definitions for the Metaverse are presented, both condensing the key attributes, where the first one is rather simplistic holistic describing "a three-dimensional online environment in which users represented by avatars interact with each other in virtual spaces decoupled from the real physical world". In contrast, the second definition is specified in a more detailed manner in the paper and further discussed. These comprehensive definitions offer specialized and general scholars an application within and beyond the scientific context of the system science, information system science, computer science, and business informatics, by also introducing open research challenges. Furthermore, an outlook on the social, economic, and technical implications is given, and the preconditions that are necessary for a successful implementation are discussed.}, language = {en} } @article{RitterbuschTeichmann2023, author = {Ritterbusch, Georg David and Teichmann, Malte Rolf}, title = {Defining the metaverse}, series = {IEEE Access}, volume = {11}, journal = {IEEE Access}, publisher = {Institute of Electrical and Electronics Engineers}, address = {New York, NY}, issn = {2169-3536}, doi = {10.1109/ACCESS.2023.3241809}, pages = {12368 -- 12377}, year = {2023}, abstract = {The term Metaverse is emerging as a result of the late push by multinational technology conglomerates and a recent surge of interest in Web 3.0, Blockchain, NFT, and Cryptocurrencies. From a scientific point of view, there is no definite consensus on what the Metaverse will be like. This paper collects, analyzes, and synthesizes scientific definitions and the accompanying major characteristics of the Metaverse using the methodology of a Systematic Literature Review (SLR). Two revised definitions for the Metaverse are presented, both condensing the key attributes, where the first one is rather simplistic holistic describing "a three-dimensional online environment in which users represented by avatars interact with each other in virtual spaces decoupled from the real physical world". In contrast, the second definition is specified in a more detailed manner in the paper and further discussed. These comprehensive definitions offer specialized and general scholars an application within and beyond the scientific context of the system science, information system science, computer science, and business informatics, by also introducing open research challenges. Furthermore, an outlook on the social, economic, and technical implications is given, and the preconditions that are necessary for a successful implementation are discussed.}, language = {en} } @phdthesis{Risch2020, author = {Risch, Julian}, title = {Reader comment analysis on online news platforms}, doi = {10.25932/publishup-48922}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-489222}, school = {Universit{\"a}t Potsdam}, pages = {xi, 135}, year = {2020}, abstract = {Comment sections of online news platforms are an essential space to express opinions and discuss political topics. However, the misuse by spammers, haters, and trolls raises doubts about whether the benefits justify the costs of the time-consuming content moderation. As a consequence, many platforms limited or even shut down comment sections completely. In this thesis, we present deep learning approaches for comment classification, recommendation, and prediction to foster respectful and engaging online discussions. The main focus is on two kinds of comments: toxic comments, which make readers leave a discussion, and engaging comments, which make readers join a discussion. First, we discourage and remove toxic comments, e.g., insults or threats. To this end, we present a semi-automatic comment moderation process, which is based on fine-grained text classification models and supports moderators. Our experiments demonstrate that data augmentation, transfer learning, and ensemble learning allow training robust classifiers even on small datasets. To establish trust in the machine-learned models, we reveal which input features are decisive for their output with attribution-based explanation methods. Second, we encourage and highlight engaging comments, e.g., serious questions or factual statements. We automatically identify the most engaging comments, so that readers need not scroll through thousands of comments to find them. The model training process builds on upvotes and replies as a measure of reader engagement. We also identify comments that address the article authors or are otherwise relevant to them to support interactions between journalists and their readership. Taking into account the readers' interests, we further provide personalized recommendations of discussions that align with their favored topics or involve frequent co-commenters. Our models outperform multiple baselines and recent related work in experiments on comment datasets from different platforms.}, language = {en} } @phdthesis{Richter2018, author = {Richter, Rico}, title = {Concepts and techniques for processing and rendering of massive 3D point clouds}, doi = {10.25932/publishup-42330}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-423304}, school = {Universit{\"a}t Potsdam}, pages = {v, 131}, year = {2018}, abstract = {Remote sensing technology, such as airborne, mobile, or terrestrial laser scanning, and photogrammetric techniques, are fundamental approaches for efficient, automatic creation of digital representations of spatial environments. For example, they allow us to generate 3D point clouds of landscapes, cities, infrastructure networks, and sites. As essential and universal category of geodata, 3D point clouds are used and processed by a growing number of applications, services, and systems such as in the domains of urban planning, landscape architecture, environmental monitoring, disaster management, virtual geographic environments as well as for spatial analysis and simulation. While the acquisition processes for 3D point clouds become more and more reliable and widely-used, applications and systems are faced with more and more 3D point cloud data. In addition, 3D point clouds, by their very nature, are raw data, i.e., they do not contain any structural or semantics information. Many processing strategies common to GIS such as deriving polygon-based 3D models generally do not scale for billions of points. GIS typically reduce data density and precision of 3D point clouds to cope with the sheer amount of data, but that results in a significant loss of valuable information at the same time. This thesis proposes concepts and techniques designed to efficiently store and process massive 3D point clouds. To this end, object-class segmentation approaches are presented to attribute semantics to 3D point clouds, used, for example, to identify building, vegetation, and ground structures and, thus, to enable processing, analyzing, and visualizing 3D point clouds in a more effective and efficient way. Similarly, change detection and updating strategies for 3D point clouds are introduced that allow for reducing storage requirements and incrementally updating 3D point cloud databases. In addition, this thesis presents out-of-core, real-time rendering techniques used to interactively explore 3D point clouds and related analysis results. All techniques have been implemented based on specialized spatial data structures, out-of-core algorithms, and GPU-based processing schemas to cope with massive 3D point clouds having billions of points. All proposed techniques have been evaluated and demonstrated their applicability to the field of geospatial applications and systems, in particular for tasks such as classification, processing, and visualization. Case studies for 3D point clouds of entire cities with up to 80 billion points show that the presented approaches open up new ways to manage and apply large-scale, dense, and time-variant 3D point clouds as required by a rapidly growing number of applications and systems.}, language = {en} } @article{RichlySchlosserBoissier2022, author = {Richly, Keven and Schlosser, Rainer and Boissier, Martin}, title = {Budget-conscious fine-grained configuration optimization for spatio-temporal applications}, series = {Proceedings of the VLDB Endowment}, volume = {15}, journal = {Proceedings of the VLDB Endowment}, number = {13}, publisher = {Association for Computing Machinery (ACM)}, address = {[New York]}, issn = {2150-8097}, doi = {10.14778/3565838.3565858}, pages = {4079 -- 4092}, year = {2022}, abstract = {Based on the performance requirements of modern spatio-temporal data mining applications, in-memory database systems are often used to store and process the data. To efficiently utilize the scarce DRAM capacities, modern database systems support various tuning possibilities to reduce the memory footprint (e.g., data compression) or increase performance (e.g., additional indexes). However, the selection of cost and performance balancing configurations is challenging due to the vast number of possible setups consisting of mutually dependent individual decisions. In this paper, we introduce a novel approach to jointly optimize the compression, sorting, indexing, and tiering configuration for spatio-temporal workloads. Further, we consider horizontal data partitioning, which enables the independent application of different tuning options on a fine-grained level. We propose different linear programming (LP) models addressing cost dependencies at different levels of accuracy to compute optimized tuning configurations for a given workload and memory budgets. To yield maintainable and robust configurations, we extend our LP-based approach to incorporate reconfiguration costs as well as a worst-case optimization for potential workload scenarios. Further, we demonstrate on a real-world dataset that our models allow to significantly reduce the memory footprint with equal performance or increase the performance with equal memory size compared to existing tuning heuristics.}, language = {en} } @phdthesis{Richly2024, author = {Richly, Keven}, title = {Memory-efficient data management for spatio-temporal applications}, doi = {10.25932/publishup-63547}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-635473}, school = {Universit{\"a}t Potsdam}, pages = {xii, 181}, year = {2024}, abstract = {The wide distribution of location-acquisition technologies means that large volumes of spatio-temporal data are continuously being accumulated. Positioning systems such as GPS enable the tracking of various moving objects' trajectories, which are usually represented by a chronologically ordered sequence of observed locations. The analysis of movement patterns based on detailed positional information creates opportunities for applications that can improve business decisions and processes in a broad spectrum of industries (e.g., transportation, traffic control, or medicine). Due to the large data volumes generated in these applications, the cost-efficient storage of spatio-temporal data is desirable, especially when in-memory database systems are used to achieve interactive performance requirements. To efficiently utilize the available DRAM capacities, modern database systems support various tuning possibilities to reduce the memory footprint (e.g., data compression) or increase performance (e.g., additional indexes structures). By considering horizontal data partitioning, we can independently apply different tuning options on a fine-grained level. However, the selection of cost and performance-balancing configurations is challenging, due to the vast number of possible setups consisting of mutually dependent individual decisions. In this thesis, we introduce multiple approaches to improve spatio-temporal data management by automatically optimizing diverse tuning options for the application-specific access patterns and data characteristics. Our contributions are as follows: (1) We introduce a novel approach to determine fine-grained table configurations for spatio-temporal workloads. Our linear programming (LP) approach jointly optimizes the (i) data compression, (ii) ordering, (iii) indexing, and (iv) tiering. We propose different models which address cost dependencies at different levels of accuracy to compute optimized tuning configurations for a given workload, memory budgets, and data characteristics. To yield maintainable and robust configurations, we further extend our LP-based approach to incorporate reconfiguration costs as well as optimizations for multiple potential workload scenarios. (2) To optimize the storage layout of timestamps in columnar databases, we present a heuristic approach for the workload-driven combined selection of a data layout and compression scheme. By considering attribute decomposition strategies, we are able to apply application-specific optimizations that reduce the memory footprint and improve performance. (3) We introduce an approach that leverages past trajectory data to improve the dispatch processes of transportation network companies. Based on location probabilities, we developed risk-averse dispatch strategies that reduce critical delays. (4) Finally, we used the use case of a transportation network company to evaluate our database optimizations on a real-world dataset. We demonstrate that workload-driven fine-grained optimizations allow us to reduce the memory footprint (up to 71\% by equal performance) or increase the performance (up to 90\% by equal memory size) compared to established rule-based heuristics. Individually, our contributions provide novel approaches to the current challenges in spatio-temporal data mining and database research. Combining them allows in-memory databases to store and process spatio-temporal data more cost-efficiently.}, language = {en} } @article{ReynoldsSwainstonBendrups2015, author = {Reynolds, Nicholas and Swainston, Andrew and Bendrups, Faye}, title = {Music Technology and Computational Thinking}, series = {KEYCIT 2014 - Key Competencies in Informatics and ICT}, journal = {KEYCIT 2014 - Key Competencies in Informatics and ICT}, number = {7}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-82913}, pages = {363 -- 370}, year = {2015}, abstract = {A project involving the composition of a number of pieces of music by public participants revealed levels of engagement with and mastery of complex music technologies by a number of secondary student volunteers. This paper reports briefly on some initial findings of that project and seeks to illuminate an understanding of computational thinking across the curriculum.}, language = {en} } @article{Respondek2014, author = {Respondek, Tobias}, title = {A workflow for computing potential areas for wind turbines}, series = {Process design for natural scientists: an agile model-driven approach}, journal = {Process design for natural scientists: an agile model-driven approach}, number = {500}, publisher = {Springer}, address = {Berlin}, isbn = {978-3-662-45005-5}, pages = {200 -- 215}, year = {2014}, abstract = {This paper describes the implementation of a workflow model for service-oriented computing of potential areas for wind turbines in jABC. By implementing a re-executable model the manual effort of a multi-criteria site analysis can be reduced. The aim is to determine the shift of typical geoprocessing tools of geographic information systems (GIS) from the desktop to the web. The analysis is based on a vector data set and mainly uses web services of the "Center for Spatial Information Science and Systems" (CSISS). This paper discusses effort, benefits and problems associated with the use of the web services.}, language = {en} } @article{Reso2014, author = {Reso, Judith}, title = {Protein Classification Workflow}, series = {Process Design for Natural Scientists: an agile model-driven approach}, journal = {Process Design for Natural Scientists: an agile model-driven approach}, number = {500}, editor = {Lambrecht, Anna-Lena and Margaria, Tiziana}, publisher = {Springer Verlag}, address = {Berlin}, isbn = {978-3-662-45005-5}, issn = {1865-0929}, pages = {65 -- 72}, year = {2014}, abstract = {The protein classification workflow described in this report enables users to get information about a novel protein sequence automatically. The information is derived by different bioinformatic analysis tools which calculate or predict features of a protein sequence. Also, databases are used to compare the novel sequence with known proteins.}, language = {en} } @book{ReschkeTaeumelPapeetal.2018, author = {Reschke, Jakob and Taeumel, Marcel and Pape, Tobias and Niephaus, Fabio and Hirschfeld, Robert}, title = {Towards version control in object-based systems}, volume = {121}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-430-2}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-410812}, publisher = {Universit{\"a}t Potsdam}, pages = {100}, year = {2018}, abstract = {Version control is a widely used practice among software developers. It reduces the risk of changing their software and allows them to manage different configurations and to collaborate with others more efficiently. This is amplified by code sharing platforms such as GitHub or Bitbucket. Most version control systems track files (e.g., Git, Mercurial, and Subversion do), but some programming environments do not operate on files, but on objects instead (many Smalltalk implementations do). Users of such environments want to use version control for their objects anyway. Specialized version control systems, such as the ones available for Smalltalk systems (e.g., ENVY/Developer and Monticello), focus on a small subset of objects that can be versioned. Most of these systems concentrate on the tracking of methods, classes, and configurations of these. Other user-defined and user-built objects are either not eligible for version control at all, tracking them involves complicated workarounds, or a fixed, domain-unspecific serialization format is used that does not equally suit all kinds of objects. Moreover, these version control systems that are specific to a programming environment require their own code sharing platforms; popular, well-established platforms for file-based version control systems cannot be used or adapter solutions need to be implemented and maintained. To improve the situation for version control of arbitrary objects, a framework for tracking, converting, and storing of objects is presented in this report. It allows editions of objects to be stored in an exchangeable, existing backend version control system. The platforms of the backend version control system can thus be reused. Users and objects have control over how objects are captured for the purpose of version control. Domain-specific requirements can be implemented. The storage format (i.e. the file format, when file-based backend version control systems are used) can also vary from one object to another. Different editions of objects can be compared and sets of changes can be applied to graphs of objects. A generic way for capturing and restoring that supports most kinds of objects is described. It models each object as a collection of slots. Thus, users can begin to track their objects without first having to implement version control supplements for their own kinds of objects. The proposed architecture is evaluated using a prototype implementation that can be used to track objects in Squeak/Smalltalk with Git. The prototype improves the suboptimal standing of user objects with respect to version control described above and also simplifies some version control tasks for classes and methods as well. It also raises new problems, which are discussed in this report as well.}, language = {en} } @misc{RepsilberKernTelaaretal.2010, author = {Repsilber, Dirk and Kern, Sabine and Telaar, Anna and Walzl, Gerhard and Black, Gillian F. and Selbig, Joachim and Parida, Shreemanta K. and Kaufmann, Stefan H. E. and Jacobsen, Marc}, title = {Biomarker discovery in heterogeneous tissue samples}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {854}, issn = {1866-8372}, doi = {10.25932/publishup-42934}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-429343}, pages = {17}, year = {2010}, abstract = {Background: For heterogeneous tissues, such as blood, measurements of gene expression are confounded by relative proportions of cell types involved. Conclusions have to rely on estimation of gene expression signals for homogeneous cell populations, e.g. by applying micro-dissection, fluorescence activated cell sorting, or in-silico deconfounding. We studied feasibility and validity of a non-negative matrix decomposition algorithm using experimental gene expression data for blood and sorted cells from the same donor samples. Our objective was to optimize the algorithm regarding detection of differentially expressed genes and to enable its use for classification in the difficult scenario of reversely regulated genes. This would be of importance for the identification of candidate biomarkers in heterogeneous tissues. Results: Experimental data and simulation studies involving noise parameters estimated from these data revealed that for valid detection of differential gene expression, quantile normalization and use of non-log data are optimal. We demonstrate the feasibility of predicting proportions of constituting cell types from gene expression data of single samples, as a prerequisite for a deconfounding-based classification approach. Classification cross-validation errors with and without using deconfounding results are reported as well as sample-size dependencies. Implementation of the algorithm, simulation and analysis scripts are available. Conclusions: The deconfounding algorithm without decorrelation using quantile normalization on non-log data is proposed for biomarkers that are difficult to detect, and for cases where confounding by varying proportions of cell types is the suspected reason. In this case, a deconfounding ranking approach can be used as a powerful alternative to, or complement of, other statistical learning approaches to define candidate biomarkers for molecular diagnosis and prediction in biomedicine, in realistically noisy conditions and with moderate sample sizes.}, language = {en} } @masterthesis{Repp2023, type = {Bachelor Thesis}, author = {Repp, Leo}, title = {Extending the automatic theorem prover nanoCoP with arithmetic procedures}, doi = {10.25932/publishup-57619}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-576195}, school = {Universit{\"a}t Potsdam}, pages = {52}, year = {2023}, abstract = {In dieser Bachelorarbeit implementiere ich den automatischen Theorembeweiser nanoCoP-Ω. Es handelt sich bei diesem neuen System um das Ergebnis einer Portierung von Arithmetik-behandelnden Prozeduren aus dem automatischen Theorembeweiser mit Arithmetik leanCoP-Ω in das System nanoCoP 2.0. Dazu wird zuerst der mathematische Hintergrund zu automatischen Theorembeweisern und Arithmetik gegeben. Ich stelle die Vorg{\"a}ngerprojekte leanCoP, nanoCoP und leanCoP-Ω vor, auf dessen Vorlage nanoCoP-Ω entwickelt wurde. Es folgt eine ausf{\"u}hrliche Erkl{\"a}rung der Konzepte, um welche der nicht-klausale Konnektionskalk{\"u}l erweitert werden muss, um eine Behandlung von arithmetischen Ausdr{\"u}cken und Gleichheiten in den Kalk{\"u}l zu integrieren, sowie eine Beschreibung der Implementierung dieser Konzepte in nanoCoP-Ω. Als letztes folgt eine experimentelle Evaluation von nanoCoP-Ω. Es wurde ein ausf{\"u}hrlicher Vergleich von Laufzeit und Anzahl gel{\"o}ster Probleme im Vergleich zum {\"a}hnlich aufgebauten Theorembeweiser leanCoP-Ω auf Basis der TPTP-Benchmark durchgef{\"u}hrt. Ich komme zu dem Ergebnis, dass nanoCoP-Ω deutlich schneller ist als leanCoP-Ω ist, jedoch weniger gut geeignet f{\"u}r gr{\"o}ßere Probleme. Zudem konnte ich feststellen, dass nanoCoP-Ω falsche Beweise liefern kann. Ich bespreche, wie dieses Problem gel{\"o}st werden kann, sowie einige m{\"o}gliche Optimierungen und Erweiterungen des Beweissystems.}, language = {en} } @article{ReffayMiledOrtizetal.2013, author = {Reffay, Christophe and Miled, Mahdi and Ortiz, Pascal and F{\´e}vrier, Loic}, title = {An epistemic hypermedia to learn python as a resource for an introductory course for algorithmic in France}, series = {Commentarii informaticae didacticae : (CID)}, journal = {Commentarii informaticae didacticae : (CID)}, number = {6}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-64545}, pages = {111 -- 118}, year = {2013}, abstract = {We launched an original large-scale experiment concerning informatics learning in French high schools. We are using the France-IOI platform to federate resources and share observation for research. The first step is the implementation of an adaptive hypermedia based on very fine grain epistemic modules for Python programming learning. We define the necessary traces to be built in order to study the trajectories of navigation the pupils will draw across this hypermedia. It may be browsed by pupils either as a course support, or an extra help to solve the list of exercises (mainly for algorithmics discovery). By leaving the locus of control to the learner, we want to observe the different trajectories they finally draw through our system. These trajectories may be abstracted and interpreted as strategies and then compared for their relative efficiency. Our hypothesis is that learners have different profiles and may use the appropriate strategy accordingly. This paper presents the research questions, the method and the expected results.}, language = {en} } @book{RanaMohapatraSidorovaetal.2022, author = {Rana, Kaushik and Mohapatra, Durga Prasad and Sidorova, Julia and Lundberg, Lars and Sk{\"o}ld, Lars and Lopes Grim, Lu{\´i}s Fernando and Sampaio Gradvohl, Andr{\´e} Leon and Cremerius, Jonas and Siegert, Simon and Weltzien, Anton von and Baldi, Annika and Klessascheck, Finn and Kalancha, Svitlana and Lichtenstein, Tom and Shaabani, Nuhad and Meinel, Christoph and Friedrich, Tobias and Lenzner, Pascal and Schumann, David and Wiese, Ingmar and Sarna, Nicole and Wiese, Lena and Tashkandi, Araek Sami and van der Walt, Est{\´e}e and Eloff, Jan H. P. and Schmidt, Christopher and H{\"u}gle, Johannes and Horschig, Siegfried and Uflacker, Matthias and Najafi, Pejman and Sapegin, Andrey and Cheng, Feng and Stojanovic, Dragan and Stojnev Ilić, Aleksandra and Djordjevic, Igor and Stojanovic, Natalija and Predic, Bratislav and Gonz{\´a}lez-Jim{\´e}nez, Mario and de Lara, Juan and Mischkewitz, Sven and Kainz, Bernhard and van Hoorn, Andr{\´e} and Ferme, Vincenzo and Schulz, Henning and Knigge, Marlene and Hecht, Sonja and Prifti, Loina and Krcmar, Helmut and Fabian, Benjamin and Ermakova, Tatiana and Kelkel, Stefan and Baumann, Annika and Morgenstern, Laura and Plauth, Max and Eberhard, Felix and Wolff, Felix and Polze, Andreas and Cech, Tim and Danz, Noel and Noack, Nele Sina and Pirl, Lukas and Beilharz, Jossekin Jakob and De Oliveira, Roberto C. L. and Soares, F{\´a}bio Mendes and Juiz, Carlos and Bermejo, Belen and M{\"u}hle, Alexander and Gr{\"u}ner, Andreas and Saxena, Vageesh and Gayvoronskaya, Tatiana and Weyand, Christopher and Krause, Mirko and Frank, Markus and Bischoff, Sebastian and Behrens, Freya and R{\"u}ckin, Julius and Ziegler, Adrian and Vogel, Thomas and Tran, Chinh and Moser, Irene and Grunske, Lars and Sz{\´a}rnyas, G{\´a}bor and Marton, J{\´o}zsef and Maginecz, J{\´a}nos and Varr{\´o}, D{\´a}niel and Antal, J{\´a}nos Benjamin}, title = {HPI Future SOC Lab - Proceedings 2018}, number = {151}, editor = {Meinel, Christoph and Polze, Andreas and Beins, Karsten and Strotmann, Rolf and Seibold, Ulrich and R{\"o}dszus, Kurt and M{\"u}ller, J{\"u}rgen}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-547-7}, issn = {1613-5652}, doi = {10.25932/publishup-56371}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-563712}, publisher = {Universit{\"a}t Potsdam}, pages = {x, 277}, year = {2022}, abstract = {The "HPI Future SOC Lab" is a cooperation of the Hasso Plattner Institute (HPI) and industry partners. Its mission is to enable and promote exchange and interaction between the research community and the industry partners. The HPI Future SOC Lab provides researchers with free of charge access to a complete infrastructure of state of the art hard and software. This infrastructure includes components, which might be too expensive for an ordinary research environment, such as servers with up to 64 cores and 2 TB main memory. The offerings address researchers particularly from but not limited to the areas of computer science and business information systems. Main areas of research include cloud computing, parallelization, and In-Memory technologies. This technical report presents results of research projects executed in 2018. Selected projects have presented their results on April 17th and November 14th 2017 at the Future SOC Lab Day events.}, language = {en} } @article{Ragonis2013, author = {Ragonis, Noa}, title = {Problem-solving strategies must be taught implicitly}, series = {Commentarii informaticae didacticae : (CID)}, journal = {Commentarii informaticae didacticae : (CID)}, number = {6}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-64645}, pages = {155 -- 157}, year = {2013}, abstract = {Problem solving is one of the central activities performed by computer scientists as well as by computer science learners. Whereas the teaching of algorithms and programming languages is usually well structured within a curriculum, the development of learners' problem-solving skills is largely implicit and less structured. Students at all levels often face difficulties in problem analysis and solution construction. The basic assumption of the workshop is that without some formal instruction on effective strategies, even the most inventive learner may resort to unproductive trial-and-error problemsolving processes. Hence, it is important to teach problem-solving strategies and to guide teachers on how to teach their pupils this cognitive tool. Computer science educators should be aware of the difficulties and acquire appropriate pedagogical tools to help their learners gain and experience problem-solving skills.}, language = {en} } @article{QuinzanGoebelWagneretal.2021, author = {Quinzan, Francesco and G{\"o}bel, Andreas and Wagner, Markus and Friedrich, Tobias}, title = {Evolutionary algorithms and submodular functions}, series = {Natural computing : an innovative journal bridging biosciences and computer sciences ; an international journal}, volume = {20}, journal = {Natural computing : an innovative journal bridging biosciences and computer sciences ; an international journal}, number = {3}, publisher = {Springer Science + Business Media B.V.}, address = {Dordrecht}, issn = {1572-9796}, doi = {10.1007/s11047-021-09841-7}, pages = {561 -- 575}, year = {2021}, abstract = {A core operator of evolutionary algorithms (EAs) is the mutation. Recently, much attention has been devoted to the study of mutation operators with dynamic and non-uniform mutation rates. Following up on this area of work, we propose a new mutation operator and analyze its performance on the (1 + 1) Evolutionary Algorithm (EA). Our analyses show that this mutation operator competes with pre-existing ones, when used by the (1 + 1) EA on classes of problems for which results on the other mutation operators are available. We show that the (1 + 1) EA using our mutation operator finds a (1/3)-approximation ratio on any non-negative submodular function in polynomial time. We also consider the problem of maximizing a symmetric submodular function under a single matroid constraint and show that the (1 + 1) EA using our operator finds a (1/3)-approximation within polynomial time. This performance matches that of combinatorial local search algorithms specifically designed to solve these problems and outperforms them with constant probability. Finally, we evaluate the performance of the (1 + 1) EA using our operator experimentally by considering two applications: (a) the maximum directed cut problem on real-world graphs of different origins, with up to 6.6 million vertices and 56 million edges and (b) the symmetric mutual information problem using a four month period air pollution data set. In comparison with uniform mutation and a recently proposed dynamic scheme, our operator comes out on top on these instances.}, language = {en} } @article{PuriVardeMelo2023, author = {Puri, Manish and Varde, Aparna S. and Melo, Gerard de}, title = {Commonsense based text mining on urban policy}, series = {Language resources and evaluation}, volume = {57}, journal = {Language resources and evaluation}, publisher = {Springer}, address = {Dordrecht [u.a.]}, issn = {1574-020X}, doi = {10.1007/s10579-022-09584-6}, pages = {733 -- 763}, year = {2023}, abstract = {Local laws on urban policy, i.e., ordinances directly affect our daily life in various ways (health, business etc.), yet in practice, for many citizens they remain impervious and complex. This article focuses on an approach to make urban policy more accessible and comprehensible to the general public and to government officials, while also addressing pertinent social media postings. Due to the intricacies of the natural language, ranging from complex legalese in ordinances to informal lingo in tweets, it is practical to harness human judgment here. To this end, we mine ordinances and tweets via reasoning based on commonsense knowledge so as to better account for pragmatics and semantics in the text. Ours is pioneering work in ordinance mining, and thus there is no prior labeled training data available for learning. This gap is filled by commonsense knowledge, a prudent choice in situations involving a lack of adequate training data. The ordinance mining can be beneficial to the public in fathoming policies and to officials in assessing policy effectiveness based on public reactions. This work contributes to smart governance, leveraging transparency in governing processes via public involvement. We focus significantly on ordinances contributing to smart cities, hence an important goal is to assess how well an urban region heads towards a smart city as per its policies mapping with smart city characteristics, and the corresponding public satisfaction.}, language = {en} } @misc{PulkkinenMetzler2015, author = {Pulkkinen, Otto and Metzler, Ralf}, title = {Variance-corrected Michaelis-Menten equation predicts transient rates of single-enzyme reactions and response times in bacterial gene-regulation}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-86632}, year = {2015}, abstract = {Many chemical reactions in biological cells occur at very low concentrations of constituent molecules. Thus, transcriptional gene-regulation is often controlled by poorly expressed transcription-factors, such as E.coli lac repressor with few tens of copies. Here we study the effects of inherent concentration fluctuations of substrate-molecules on the seminal Michaelis-Menten scheme of biochemical reactions. We present a universal correction to the Michaelis-Menten equation for the reaction-rates. The relevance and validity of this correction for enzymatic reactions and intracellular gene-regulation is demonstrated. Our analytical theory and simulation results confirm that the proposed variance-corrected Michaelis-Menten equation predicts the rate of reactions with remarkable accuracy even in the presence of large non-equilibrium concentration fluctuations. The major advantage of our approach is that it involves only the mean and variance of the substrate-molecule concentration. Our theory is therefore accessible to experiments and not specific to the exact source of the concentration fluctuations.}, language = {en} } @article{PrzybyllaRomeike2015, author = {Przybylla, Mareen and Romeike, Ralf}, title = {Key Competences with Physical Computing}, series = {KEYCIT 2014 - Key Competencies in Informatics and ICT}, journal = {KEYCIT 2014 - Key Competencies in Informatics and ICT}, number = {7}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-82904}, pages = {351 -- 361}, year = {2015}, abstract = {Physical computing covers the design and realization of interactive objects and installations and allows students to develop concrete, tangible products of the real world that arise from the learners' imagination. This way, constructionist learning is raised to a level that enables students to gain haptic experience and thereby concretizes the virtual. In this paper the defining characteristics of physical computing are described. Key competences to be gained with physical computing will be identified.}, language = {en} } @phdthesis{Prohaska2007, author = {Prohaska, Steffen}, title = {Skeleton-based visualization of massive voxel objects with network-like architecture}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-14888}, school = {Universit{\"a}t Potsdam}, year = {2007}, abstract = {This work introduces novel internal and external memory algorithms for computing voxel skeletons of massive voxel objects with complex network-like architecture and for converting these voxel skeletons to piecewise linear geometry, that is triangle meshes and piecewise straight lines. The presented techniques help to tackle the challenge of visualizing and analyzing 3d images of increasing size and complexity, which are becoming more and more important in, for example, biological and medical research. Section 2.3.1 contributes to the theoretical foundations of thinning algorithms with a discussion of homotopic thinning in the grid cell model. The grid cell model explicitly represents a cell complex built of faces, edges, and vertices shared between voxels. A characterization of pairs of cells to be deleted is much simpler than characterizations of simple voxels were before. The grid cell model resolves topologically unclear voxel configurations at junctions and locked voxel configurations causing, for example, interior voxels in sets of non-simple voxels. A general conclusion is that the grid cell model is superior to indecomposable voxels for algorithms that need detailed control of topology. Section 2.3.2 introduces a noise-insensitive measure based on the geodesic distance along the boundary to compute two-dimensional skeletons. The measure is able to retain thin object structures if they are geometrically important while ignoring noise on the object's boundary. This combination of properties is not known of other measures. The measure is also used to guide erosion in a thinning process from the boundary towards lines centered within plate-like structures. Geodesic distance based quantities seem to be well suited to robustly identify one- and two-dimensional skeletons. Chapter 6 applies the method to visualization of bone micro-architecture. Chapter 3 describes a novel geometry generation scheme for representing voxel skeletons, which retracts voxel skeletons to piecewise linear geometry per dual cube. The generated triangle meshes and graphs provide a link to geometry processing and efficient rendering of voxel skeletons. The scheme creates non-closed surfaces with boundaries, which contain fewer triangles than a representation of voxel skeletons using closed surfaces like small cubes or iso-surfaces. A conclusion is that thinking specifically about voxel skeleton configurations instead of generic voxel configurations helps to deal with the topological implications. The geometry generation is one foundation of the applications presented in Chapter 6. Chapter 5 presents a novel external memory algorithm for distance ordered homotopic thinning. The presented method extends known algorithms for computing chamfer distance transformations and thinning to execute I/O-efficiently when input is larger than the available main memory. The applied block-wise decomposition schemes are quite simple. Yet it was necessary to carefully analyze effects of block boundaries to devise globally correct external memory variants of known algorithms. In general, doing so is superior to naive block-wise processing ignoring boundary effects. Chapter 6 applies the algorithms in a novel method based on confocal microscopy for quantitative study of micro-vascular networks in the field of microcirculation.}, language = {en} } @article{PrestonYounie2015, author = {Preston, Christina and Younie, Sarah}, title = {Mentoring in a Digital World}, series = {KEYCIT 2014 - Key Competencies in Informatics and ICT}, journal = {KEYCIT 2014 - Key Competencies in Informatics and ICT}, number = {7}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-82895}, pages = {343 -- 350}, year = {2015}, abstract = {This paper focuses on the results of the evaluation of the first pilot of an e-mentoring unit designed by the Hands-On ICT consortium, funded by the EU LLL programme. The overall aim of this two-year activity is to investigate the value for professional learning of Massive Online Open Courses (MOOCs) and Community Online Open Courses (COOCs) in the context of a 'community of practice'. Three units in the first pilot covered aspects of using digital technologies to develop creative thinking skills. The findings in this paper relate to the fourth unit about e-mentoring, a skill that was important to delivering the course content in the other three units. Findings about the e-mentoring unit included: the students' request for detailed profiles so that participants can get to know each other; and, the need to reconcile the different interpretations of e-mentoring held by the participants when the course begins. The evaluators concluded that the major issues were that: not all professional learners would self-organise and network; and few would wish to mentor their colleagues voluntarily. Therefore, the e-mentoring issues will need careful consideration in pilots two and three to identify how e-mentoring will be organised.}, language = {en} } @unpublished{PrasseGrubenMachlikaetal.2016, author = {Prasse, Paul and Gruben, Gerrit and Machlika, Lukas and Pevny, Tomas and Sofka, Michal and Scheffer, Tobias}, title = {Malware Detection by HTTPS Traffic Analysis}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-100942}, pages = {10}, year = {2016}, abstract = {In order to evade detection by network-traffic analysis, a growing proportion of malware uses the encrypted HTTPS protocol. We explore the problem of detecting malware on client computers based on HTTPS traffic analysis. In this setting, malware has to be detected based on the host IP address, ports, timestamp, and data volume information of TCP/IP packets that are sent and received by all the applications on the client. We develop a scalable protocol that allows us to collect network flows of known malicious and benign applications as training data and derive a malware-detection method based on a neural networks and sequence classification. We study the method's ability to detect known and new, unknown malware in a large-scale empirical study.}, language = {en} } @phdthesis{Prasse2016, author = {Prasse, Paul}, title = {Pattern recognition for computer security}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-100251}, school = {Universit{\"a}t Potsdam}, pages = {VI, 75}, year = {2016}, abstract = {Computer Security deals with the detection and mitigation of threats to computer networks, data, and computing hardware. This thesis addresses the following two computer security problems: email spam campaign and malware detection. Email spam campaigns can easily be generated using popular dissemination tools by specifying simple grammars that serve as message templates. A grammar is disseminated to nodes of a bot net, the nodes create messages by instantiating the grammar at random. Email spam campaigns can encompass huge data volumes and therefore pose a threat to the stability of the infrastructure of email service providers that have to store them. Malware -software that serves a malicious purpose- is affecting web servers, client computers via active content, and client computers through executable files. Without the help of malware detection systems it would be easy for malware creators to collect sensitive information or to infiltrate computers. The detection of threats -such as email-spam messages, phishing messages, or malware- is an adversarial and therefore intrinsically difficult problem. Threats vary greatly and evolve over time. The detection of threats based on manually-designed rules is therefore difficult and requires a constant engineering effort. Machine-learning is a research area that revolves around the analysis of data and the discovery of patterns that describe aspects of the data. Discriminative learning methods extract prediction models from data that are optimized to predict a target attribute as accurately as possible. Machine-learning methods hold the promise of automatically identifying patterns that robustly and accurately detect threats. This thesis focuses on the design and analysis of discriminative learning methods for the two computer-security problems under investigation: email-campaign and malware detection. The first part of this thesis addresses email-campaign detection. We focus on regular expressions as a syntactic framework, because regular expressions are intuitively comprehensible by security engineers and administrators, and they can be applied as a detection mechanism in an extremely efficient manner. In this setting, a prediction model is provided with exemplary messages from an email-spam campaign. The prediction model has to generate a regular expression that reveals the syntactic pattern that underlies the entire campaign, and that a security engineers finds comprehensible and feels confident enough to use the expression to blacklist further messages at the email server. We model this problem as two-stage learning problem with structured input and output spaces which can be solved using standard cutting plane methods. Therefore we develop an appropriate loss function, and derive a decoder for the resulting optimization problem. The second part of this thesis deals with the problem of predicting whether a given JavaScript or PHP file is malicious or benign. Recent malware analysis techniques use static or dynamic features, or both. In fully dynamic analysis, the software or script is executed and observed for malicious behavior in a sandbox environment. By contrast, static analysis is based on features that can be extracted directly from the program file. In order to bypass static detection mechanisms, code obfuscation techniques are used to spread a malicious program file in many different syntactic variants. Deobfuscating the code before applying a static classifier can be subjected to mostly static code analysis and can overcome the problem of obfuscated malicious code, but on the other hand increases the computational costs of malware detection by an order of magnitude. In this thesis we present a cascaded architecture in which a classifier first performs a static analysis of the original code and -based on the outcome of this first classification step- the code may be deobfuscated and classified again. We explore several types of features including token \$n\$-grams, orthogonal sparse bigrams, subroutine-hashings, and syntax-tree features and study the robustness of detection methods and feature types against the evolution of malware over time. The developed tool scans very large file collections quickly and accurately. Each model is evaluated on real-world data and compared to reference methods. Our approach of inferring regular expressions to filter emails belonging to an email spam campaigns leads to models with a high true-positive rate at a very low false-positive rate that is an order of magnitude lower than that of a commercial content-based filter. Our presented system -REx-SVMshort- is being used by a commercial email service provider and complements content-based and IP-address based filtering. Our cascaded malware detection system is evaluated on a high-quality data set of almost 400,000 conspicuous PHP files and a collection of more than 1,00,000 JavaScript files. From our case study we can conclude that our system can quickly and accurately process large data collections at a low false-positive rate.}, language = {en} } @article{PousttchiGleiss2019, author = {Pousttchi, Key and Gleiß, Alexander}, title = {Surrounded by middlemen - how multi-sided platforms change the insurance industry}, series = {Electron Markets}, volume = {29}, journal = {Electron Markets}, number = {4}, publisher = {Springer}, address = {Heidelberg}, issn = {1019-6781}, doi = {10.1007/s12525-019-00363-w}, pages = {609 -- 629}, year = {2019}, abstract = {Multi-sided platforms (MSP) strongly affect markets and play a crucial part within the digital and networked economy. Although empirical evidence indicates their occurrence in many industries, research has not investigated the game-changing impact of MSP on traditional markets to a sufficient extent. More specifically, we have little knowledge of how MSP affect value creation and customer interaction in entire markets, exploiting the potential of digital technologies to offer new value propositions. Our paper addresses this research gap and provides an initial systematic approach to analyze the impact of MSP on the insurance industry. For this purpose, we analyze the state of the art in research and practice in order to develop a reference model of the value network for the insurance industry. On this basis, we conduct a case-study analysis to discover and analyze roles which are occupied or even newly created by MSP. As a final step, we categorize MSP with regard to their relation to traditional insurance companies, resulting in a classification scheme with four MSP standard types: Competition, Coordination, Cooperation, Collaboration.}, language = {en} } @article{PonceSrinathAllegue2021, author = {Ponce, Eva and Srinath, Sindhu and Allegue, Laura}, title = {Integrating Community Teaching in MOOCs}, series = {EMOOCs 2021}, volume = {2021}, journal = {EMOOCs 2021}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-512-5}, doi = {10.25932/publishup-51712}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-517123}, pages = {95 -- 109}, year = {2021}, abstract = {The MITx MicroMasters Program in Supply Chain Management (SCM) is a Massive Open Online Course (MOOC) based program that aims to impart quantitative and qualitative knowledge to SCM enthusiasts all around the world. The program that started in 2014 with just one course, now offers 5 courses and one final proctored exam, which allows a learner to gain a MicroMasters credential upon completion. While the courses are delivered in the form of pre-recorded videos by the faculty members of Massachusetts Institute of Technology (MIT), the questions and comments posted by learners in discussion forums are addressed by a group of Community Teaching Assistants (CTAs) who volunteer for this role. The MITx staff carefully selects CTAs for each run of the individual courses as they take on a co-facilitator's role in the program. This paper highlights the importance of community teaching, discusses the profile of CTAs involved with the program, their recruitment, training, tasks and responsibilities, engagement, and rewarding process. In the end we also share a few recommendations based on the lessons learned in community teaching during the last five years of running more than 45 MOOC courses, that could help other MOOC teams deliver a high-touch experience.}, language = {en} } @book{PolyvyanyySmirnovWeske2008, author = {Polyvyanyy, Artem and Smirnov, Sergey and Weske, Mathias}, title = {Reducing the complexity of large EPCs}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-32959}, publisher = {Universit{\"a}t Potsdam}, year = {2008}, abstract = {Inhalt: 1 Introduction 2 Motivation and Goal 3 Fundamentals 4 Elementary Abstractions 5 Real World Example 6 Conclusions}, language = {en} } @book{PolyvyanyySmirnovWeske2008, author = {Polyvyanyy, Artem and Smirnov, Sergey and Weske, Mathias}, title = {The triconnected abstraction of process models}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-940793-65-2}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-32847}, publisher = {Universit{\"a}t Potsdam}, pages = {17}, year = {2008}, abstract = {Contents: Artem Polyvanny, Sergey Smirnow, and Mathias Weske The Triconnected Abstraction of Process Models 1 Introduction 2 Business Process Model Abstraction 3 Preliminaries 4 Triconnected Decomposition 4.1 Basic Approach for Process Component Discovery 4.2 SPQR-Tree Decomposition 4.3 SPQR-Tree Fragments in the Context of Process Models 5 Triconnected Abstraction 5.1 Abstraction Rules 5.2 Abstraction Algorithm 6 Related Work and Conclusions}, language = {en} } @book{PolyvyanyyKuropka2007, author = {Polyvyanyy, Artem and Kuropka, Dominik}, title = {A quantitative evaluation of the enhanced topic-based vector space model}, isbn = {978-3-939469-95-7}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-33816}, publisher = {Universit{\"a}t Potsdam}, year = {2007}, abstract = {This contribution presents a quantitative evaluation procedure for Information Retrieval models and the results of this procedure applied on the enhanced Topic-based Vector Space Model (eTVSM). Since the eTVSM is an ontology-based model, its effectiveness heavily depends on the quality of the underlaying ontology. Therefore the model has been tested with different ontologies to evaluate the impact of those ontologies on the effectiveness of the eTVSM. On the highest level of abstraction, the following results have been observed during our evaluation: First, the theoretically deduced statement that the eTVSM has a similar effecitivity like the classic Vector Space Model if a trivial ontology (every term is a concept and it is independet of any other concepts) is used has been approved. Second, we were able to show that the effectiveness of the eTVSM raises if an ontology is used which is only able to resolve synonyms. We were able to derive such kind of ontology automatically from the WordNet ontology. Third, we observed that more powerful ontologies automatically derived from the WordNet, dramatically dropped the effectiveness of the eTVSM model even clearly below the effectiveness level of the Vector Space Model. Fourth, we were able to show that a manually created and optimized ontology is able to raise the effectiveness of the eTVSM to a level which is clearly above the best effectiveness levels we have found in the literature for the Latent Semantic Index model with compareable document sets.}, language = {en} } @phdthesis{Polyvyanyy2012, author = {Polyvyanyy, Artem}, title = {Structuring process models}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-59024}, school = {Universit{\"a}t Potsdam}, year = {2012}, abstract = {One can fairly adopt the ideas of Donald E. Knuth to conclude that process modeling is both a science and an art. Process modeling does have an aesthetic sense. Similar to composing an opera or writing a novel, process modeling is carried out by humans who undergo creative practices when engineering a process model. Therefore, the very same process can be modeled in a myriad number of ways. Once modeled, processes can be analyzed by employing scientific methods. Usually, process models are formalized as directed graphs, with nodes representing tasks and decisions, and directed arcs describing temporal constraints between the nodes. Common process definition languages, such as Business Process Model and Notation (BPMN) and Event-driven Process Chain (EPC) allow process analysts to define models with arbitrary complex topologies. The absence of structural constraints supports creativity and productivity, as there is no need to force ideas into a limited amount of available structural patterns. Nevertheless, it is often preferable that models follow certain structural rules. A well-known structural property of process models is (well-)structuredness. A process model is (well-)structured if and only if every node with multiple outgoing arcs (a split) has a corresponding node with multiple incoming arcs (a join), and vice versa, such that the set of nodes between the split and the join induces a single-entry-single-exit (SESE) region; otherwise the process model is unstructured. The motivations for well-structured process models are manifold: (i) Well-structured process models are easier to layout for visual representation as their formalizations are planar graphs. (ii) Well-structured process models are easier to comprehend by humans. (iii) Well-structured process models tend to have fewer errors than unstructured ones and it is less probable to introduce new errors when modifying a well-structured process model. (iv) Well-structured process models are better suited for analysis with many existing formal techniques applicable only for well-structured process models. (v) Well-structured process models are better suited for efficient execution and optimization, e.g., when discovering independent regions of a process model that can be executed concurrently. Consequently, there are process modeling languages that encourage well-structured modeling, e.g., Business Process Execution Language (BPEL) and ADEPT. However, the well-structured process modeling implies some limitations: (i) There exist processes that cannot be formalized as well-structured process models. (ii) There exist processes that when formalized as well-structured process models require a considerable duplication of modeling constructs. Rather than expecting well-structured modeling from start, we advocate for the absence of structural constraints when modeling. Afterwards, automated methods can suggest, upon request and whenever possible, alternative formalizations that are "better" structured, preferably well-structured. In this thesis, we study the problem of automatically transforming process models into equivalent well-structured models. The developed transformations are performed under a strong notion of behavioral equivalence which preserves concurrency. The findings are implemented in a tool, which is publicly available.}, language = {en} } @article{PoceReValente2021, author = {Poce, Antonella and Re, Maria Rosaria and Valente, Mara}, title = {Evaluating OERs in Museum Education Context}, series = {EMOOCs 2021}, volume = {2021}, journal = {EMOOCs 2021}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-512-5}, doi = {10.25932/publishup-51717}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-517178}, pages = {159 -- 168}, year = {2021}, abstract = {This paper aims to present the results of a higher education experience promoted by the research centres INTELLECT (University of Modena and Reggio Emilia) and CDM (University of Roma Tre), as part of difference master's degrees programme of the academic years 2018/2019, 2019/2020, and 2020/2021. Through different online activities, 37 students attended and evaluated a MOOC on museum education content, such promoting their professionals and transverse skills, such as critical thinking, and developing their knowledge relative to OERs, within culture and heritage education contexts. Moreover, results from the online evaluation activities support the implementation of the MOOC in a collaborative way: during the academic years, evaluation data have been used by researcher to make changes to the course modules, thus realizing a more effective online path from and educational point of view.}, language = {en} } @phdthesis{Plauth2022, author = {Plauth, Max Frederik}, title = {Improving the Accessibility of Heterogeneous System Resources for Application Developers using Programming Abstractions}, doi = {10.25932/publishup-55811}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-558118}, school = {Universit{\"a}t Potsdam}, pages = {ix, 133}, year = {2022}, abstract = {The heterogeneity of today's state-of-the-art computer architectures is confronting application developers with an immense degree of complexity which results from two major challenges. First, developers need to acquire profound knowledge about the programming models or the interaction models associated with each type of heterogeneous system resource to make efficient use thereof. Second, developers must take into account that heterogeneous system resources always need to exchange data with each other in order to work on a problem together. However, this data exchange is always associated with a certain amount of overhead, which is why the amounts of data exchanged should be kept as low as possible. This thesis proposes three programming abstractions to lessen the burdens imposed by these major challenges with the goal of making heterogeneous system resources accessible to a wider range of application developers. The lib842 compression library provides the first method for accessing the compression and decompression facilities of the NX-842 on-chip compression accelerator available in IBM Power CPUs from user space applications running on Linux. Addressing application development of scale-out GPU workloads, the CloudCL framework makes the resources of GPU clusters more accessible by hiding many aspects of distributed computing while enabling application developers to focus on the aspects of the data parallel programming model associated with GPUs. Furthermore, CloudCL is augmented with transparent data compression facilities based on the lib842 library in order to improve the efficiency of data transfers among cluster nodes. The improved data transfer efficiency provided by the integration of transparent data compression yields performance improvements ranging between 1.11x and 2.07x across four data-intensive scale-out GPU workloads. To investigate the impact of programming abstractions for data placement in NUMA systems, a comprehensive evaluation of the PGASUS framework for NUMA-aware C++ application development is conducted. On a wide range of test systems, the evaluation demonstrates that PGASUS does not only improve the developer experience across all workloads, but that it is also capable of outperforming NUMA-agnostic implementations with average performance improvements of 1.56x. Based on these programming abstractions, this thesis demonstrates that by providing a sufficient degree of abstraction, the accessibility of heterogeneous system resources can be improved for application developers without occluding performance-critical properties of the underlying hardware.}, language = {en} } @article{PlanteuStandlGrossmannetal.2013, author = {Planteu, Lukas and Standl, Bernhard and Grossmann, Wilfried and Neuwirth, Erich}, title = {Integrating school practice in Austrian teacher education}, series = {Commentarii informaticae didacticae : (CID)}, journal = {Commentarii informaticae didacticae : (CID)}, number = {6}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-64628}, pages = {151 -- 152}, year = {2013}, abstract = {We present a concept of better integration of practical teaching in student teacher education in Computer Science. As an introduction to the workshop different possible scenarios are discussed on the basis of examples. Afterwards workshop participants will have the opportunity to discuss the application of the aconcepts in other settings.}, language = {en} } @article{PfitznerSteckhanArnrich2021, author = {Pfitzner, Bjarne and Steckhan, Nico and Arnrich, Bert}, title = {Federated learning in a medical context}, series = {ACM transactions on internet technology : TOIT / Association for Computing}, volume = {21}, journal = {ACM transactions on internet technology : TOIT / Association for Computing}, number = {2}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {1533-5399}, doi = {10.1145/3412357}, pages = {1 -- 31}, year = {2021}, abstract = {Data privacy is a very important issue. Especially in fields like medicine, it is paramount to abide by the existing privacy regulations to preserve patients' anonymity. However, data is required for research and training machine learning models that could help gain insight into complex correlations or personalised treatments that may otherwise stay undiscovered. Those models generally scale with the amount of data available, but the current situation often prohibits building large databases across sites. So it would be beneficial to be able to combine similar or related data from different sites all over the world while still preserving data privacy. Federated learning has been proposed as a solution for this, because it relies on the sharing of machine learning models, instead of the raw data itself. That means private data never leaves the site or device it was collected on. Federated learning is an emerging research area, and many domains have been identified for the application of those methods. This systematic literature review provides an extensive look at the concept of and research into federated learning and its applicability for confidential healthcare datasets.}, language = {en} } @article{Petre2013, author = {Petre, Marian}, title = {Computing is not a spectator sport}, series = {Commentarii informaticae didacticae : (CID)}, journal = {Commentarii informaticae didacticae : (CID)}, number = {5}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-65045}, pages = {155 -- 159}, year = {2013}, abstract = {This talk will describe My Digital Life (TU100), a distance learning module that introduces computer science through immediate engagement with ubiquitous computing (ubicomp). This talk will describe some of the principles and concepts we have adopted for this modern computing introduction: the idea of the 'informed digital citizen'; engagement through narrative; playful pedagogy; making the power of ubicomp available to novices; setting technical skills in real contexts. It will also trace how the pedagogy is informed by experiences and research in Computer Science education.}, language = {en} } @article{PerugiaPaetzelPruesmannAlanenpaeaeetal.2021, author = {Perugia, Giulia and Paetzel-Pr{\"u}smann, Maike and Alanenp{\"a}{\"a}, Madelene and Castellano, Ginevra}, title = {I can see it in your eyes}, series = {Frontiers in robotics and AI}, volume = {8}, journal = {Frontiers in robotics and AI}, publisher = {Frontiers Media}, address = {Lausanne}, issn = {2296-9144}, doi = {10.3389/frobt.2021.645956}, pages = {18}, year = {2021}, abstract = {Over the past years, extensive research has been dedicated to developing robust platforms and data-driven dialog models to support long-term human-robot interactions. However, little is known about how people's perception of robots and engagement with them develop over time and how these can be accurately assessed through implicit and continuous measurement techniques. In this paper, we explore this by involving participants in three interaction sessions with multiple days of zero exposure in between. Each session consists of a joint task with a robot as well as two short social chats with it before and after the task. We measure participants' gaze patterns with a wearable eye-tracker and gauge their perception of the robot and engagement with it and the joint task using questionnaires. Results disclose that aversion of gaze in a social chat is an indicator of a robot's uncanniness and that the more people gaze at the robot in a joint task, the worse they perform. In contrast with most HRI literature, our results show that gaze toward an object of shared attention, rather than gaze toward a robotic partner, is the most meaningful predictor of engagement in a joint task. Furthermore, the analyses of gaze patterns in repeated interactions disclose that people's mutual gaze in a social chat develops congruently with their perceptions of the robot over time. These are key findings for the HRI community as they entail that gaze behavior can be used as an implicit measure of people's perception of robots in a social chat and of their engagement and task performance in a joint task.}, language = {en} } @phdthesis{Perscheid2013, author = {Perscheid, Michael}, title = {Test-driven fault navigation for debugging reproducible failures}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-68155}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {The correction of software failures tends to be very cost-intensive because their debugging is an often time-consuming development activity. During this activity, developers largely attempt to understand what causes failures: Starting with a test case that reproduces the observable failure they have to follow failure causes on the infection chain back to the root cause (defect). This idealized procedure requires deep knowledge of the system and its behavior because failures and defects can be far apart from each other. Unfortunately, common debugging tools are inadequate for systematically investigating such infection chains in detail. Thus, developers have to rely primarily on their intuition and the localization of failure causes is not time-efficient. To prevent debugging by disorganized trial and error, experienced developers apply the scientific method and its systematic hypothesis-testing. However, even when using the scientific method, the search for failure causes can still be a laborious task. First, lacking expertise about the system makes it hard to understand incorrect behavior and to create reasonable hypotheses. Second, contemporary debugging approaches provide no or only partial support for the scientific method. In this dissertation, we present test-driven fault navigation as a debugging guide for localizing reproducible failures with the scientific method. Based on the analysis of passing and failing test cases, we reveal anomalies and integrate them into a breadth-first search that leads developers to defects. This systematic search consists of four specific navigation techniques that together support the creation, evaluation, and refinement of failure cause hypotheses for the scientific method. First, structure navigation localizes suspicious system parts and restricts the initial search space. Second, team navigation recommends experienced developers for helping with failures. Third, behavior navigation allows developers to follow emphasized infection chains back to root causes. Fourth, state navigation identifies corrupted state and reveals parts of the infection chain automatically. We implement test-driven fault navigation in our Path Tools framework for the Squeak/Smalltalk development environment and limit its computation cost with the help of our incremental dynamic analysis. This lightweight dynamic analysis ensures an immediate debugging experience with our tools by splitting the run-time overhead over multiple test runs depending on developers' needs. Hence, our test-driven fault navigation in combination with our incremental dynamic analysis answers important questions in a short time: where to start debugging, who understands failure causes best, what happened before failures, and which state properties are infected.}, language = {en} } @article{Perscheid2021, author = {Perscheid, Cindy}, title = {Integrative biomarker detection on high-dimensional gene expression data sets}, series = {Briefings in bioinformatics}, volume = {22}, journal = {Briefings in bioinformatics}, number = {3}, publisher = {Oxford Univ. Press}, address = {Oxford}, issn = {1467-5463}, doi = {10.1093/bib/bbaa151}, pages = {18}, year = {2021}, abstract = {Gene expression data provide the expression levels of tens of thousands of genes from several hundred samples. These data are analyzed to detect biomarkers that can be of prognostic or diagnostic use. Traditionally, biomarker detection for gene expression data is the task of gene selection. The vast number of genes is reduced to a few relevant ones that achieve the best performance for the respective use case. Traditional approaches select genes based on their statistical significance in the data set. This results in issues of robustness, redundancy and true biological relevance of the selected genes. Integrative analyses typically address these shortcomings by integrating multiple data artifacts from the same objects, e.g. gene expression and methylation data. When only gene expression data are available, integrative analyses instead use curated information on biological processes from public knowledge bases. With knowledge bases providing an ever-increasing amount of curated biological knowledge, such prior knowledge approaches become more powerful. This paper provides a thorough overview on the status quo of biomarker detection on gene expression data with prior biological knowledge. We discuss current shortcomings of traditional approaches, review recent external knowledge bases, provide a classification and qualitative comparison of existing prior knowledge approaches and discuss open challenges for this kind of gene selection.}, language = {en} } @article{Perscheid2021, author = {Perscheid, Cindy}, title = {Comprior}, series = {BMC Bioinformatics}, volume = {22}, journal = {BMC Bioinformatics}, publisher = {Springer Nature}, address = {London}, issn = {1471-2105}, doi = {10.1186/s12859-021-04308-z}, pages = {1 -- 15}, year = {2021}, abstract = {Background Reproducible benchmarking is important for assessing the effectiveness of novel feature selection approaches applied on gene expression data, especially for prior knowledge approaches that incorporate biological information from online knowledge bases. However, no full-fledged benchmarking system exists that is extensible, provides built-in feature selection approaches, and a comprehensive result assessment encompassing classification performance, robustness, and biological relevance. Moreover, the particular needs of prior knowledge feature selection approaches, i.e. uniform access to knowledge bases, are not addressed. As a consequence, prior knowledge approaches are not evaluated amongst each other, leaving open questions regarding their effectiveness. Results We present the Comprior benchmark tool, which facilitates the rapid development and effortless benchmarking of feature selection approaches, with a special focus on prior knowledge approaches. Comprior is extensible by custom approaches, offers built-in standard feature selection approaches, enables uniform access to multiple knowledge bases, and provides a customizable evaluation infrastructure to compare multiple feature selection approaches regarding their classification performance, robustness, runtime, and biological relevance. Conclusion Comprior allows reproducible benchmarking especially of prior knowledge approaches, which facilitates their applicability and for the first time enables a comprehensive assessment of their effectiveness}, language = {en} } @phdthesis{Perlich2019, author = {Perlich, Anja}, title = {Digital collaborative documentation in mental healthcare}, doi = {10.25932/publishup-44029}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-440292}, school = {Universit{\"a}t Potsdam}, pages = {x, 135}, year = {2019}, abstract = {With the growth of information technology, patient attitudes are shifting - away from passively receiving care towards actively taking responsibility for their well- being. Handling doctor-patient relationships collaboratively and providing patients access to their health information are crucial steps in empowering patients. In mental healthcare, the implicit consensus amongst practitioners has been that sharing medical records with patients may have an unpredictable, harmful impact on clinical practice. In order to involve patients more actively in mental healthcare processes, Tele-Board MED (TBM) allows for digital collaborative documentation in therapist-patient sessions. The TBM software system offers a whiteboard-inspired graphical user interface that allows therapist and patient to jointly take notes during the treatment session. Furthermore, it provides features to automatically reuse the digital treatment session notes for the creation of treatment session summaries and clinical case reports. This thesis presents the development of the TBM system and evaluates its effects on 1) the fulfillment of the therapist's duties of clinical case documentation, 2) patient engagement in care processes, and 3) the therapist-patient relationship. Following the design research methodology, TBM was developed and tested in multiple evaluation studies in the domains of cognitive behavioral psychotherapy and addiction care. The results show that therapists are likely to use TBM with patients if they have a technology-friendly attitude and when its use suits the treatment context. Support in carrying out documentation duties as well as fulfilling legal requirements contributes to therapist acceptance. Furthermore, therapists value TBM as a tool to provide a discussion framework and quick access to worksheets during treatment sessions. Therapists express skepticism, however, regarding technology use in patient sessions and towards complete record transparency in general. Patients expect TBM to improve the communication with their therapist and to offer a better recall of discussed topics when taking a copy of their notes home after the session. Patients are doubtful regarding a possible distraction of the therapist and usage in situations when relationship-building is crucial. When applied in a clinical environment, collaborative note-taking with TBM encourages patient engagement and a team feeling between therapist and patient. Furthermore, it increases the patient's acceptance of their diagnosis, which in turn is an important predictor for therapy success. In summary, TBM has a high potential to deliver more than documentation support and record transparency for patients, but also to contribute to a collaborative doctor-patient relationship. This thesis provides design implications for the development of digital collaborative documentation systems in (mental) healthcare as well as recommendations for a successful implementation in clinical practice.}, language = {en} } @article{PerachAlexandron2021, author = {Perach, Shai and Alexandron, Giora}, title = {A MOOC-Based Computer Science Program for Middle School}, series = {EMOOCs 2021}, volume = {2021}, journal = {EMOOCs 2021}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-512-5}, doi = {10.25932/publishup-51713}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-517133}, pages = {111 -- 127}, year = {2021}, abstract = {In an attempt to pave the way for more extensive Computer Science Education (CSE) coverage in K-12, this research developed and made a preliminary evaluation of a blended-learning Introduction to CS program based on an academic MOOC. Using an academic MOOC that is pedagogically effective and engaging, such a program may provide teachers with disciplinary scaffolds and allow them to focus their attention on enhancing students' learning experience and nurturing critical 21st-century skills such as self-regulated learning. As we demonstrate, this enabled us to introduce an academic level course to middle-school students. In this research, we developed the principals and initial version of such a program, targeting ninth-graders in science-track classes who learn CS as part of their standard curriculum. We found that the middle-schoolers who participated in the program achieved academic results on par with undergraduate students taking this MOOC for academic credit. Participating students also developed a more accurate perception of the essence of CS as a scientific discipline. The unplanned school closure due to the COVID19 pandemic outbreak challenged the research but underlined the advantages of such a MOOCbased blended learning program above classic pedagogy in times of global or local crises that lead to school closure. While most of the science track classes seem to stop learning CS almost entirely, and the end-of-year MoE exam was discarded, the program's classes smoothly moved to remote learning mode, and students continued to study at a pace similar to that experienced before the school shut down.}, language = {en} } @article{PengLiuWangetal.2018, author = {Peng, Junjie and Liu, Danxu and Wang, Yingtao and Zeng, Ying and Cheng, Feng and Zhang, Wenqiang}, title = {Weight-based strategy for an I/O-intensive application at a cloud data center}, series = {Concurrency and computation : practice \& experience}, volume = {30}, journal = {Concurrency and computation : practice \& experience}, number = {19}, publisher = {Wiley}, address = {Hoboken}, issn = {1532-0626}, doi = {10.1002/cpe.4648}, pages = {14}, year = {2018}, abstract = {Applications with different characteristics in the cloud may have different resources preferences. However, traditional resource allocation and scheduling strategies rarely take into account the characteristics of applications. Considering that an I/O-intensive application is a typical type of application and that frequent I/O accesses, especially small files randomly accessing the disk, may lead to an inefficient use of resources and reduce the quality of service (QoS) of applications, a weight allocation strategy is proposed based on the available resources that a physical server can provide as well as the characteristics of the applications. Using the weight obtained, a resource allocation and scheduling strategy is presented based on the specific application characteristics in the data center. Extensive experiments show that the strategy is correct and can guarantee a high concurrency of I/O per second (IOPS) in a cloud data center with high QoS. Additionally, the strategy can efficiently improve the utilization of the disk and resources of the data center without affecting the service quality of applications.}, language = {en} } @article{PawassarTiberius2021, author = {Pawassar, Christian Matthias and Tiberius, Victor}, title = {Virtual reality in health care}, series = {JMIR Serious Games}, volume = {9}, journal = {JMIR Serious Games}, edition = {4}, publisher = {JMIR Publications}, address = {Toronto, Kanada}, issn = {2291-9279}, doi = {10.2196/32721}, pages = {1 -- 19}, year = {2021}, abstract = {Background: Research into the application of virtual reality technology in the health care sector has rapidly increased, resulting in a large body of research that is difficult to keep up with. Objective: We will provide an overview of the annual publication numbers in this field and the most productive and influential countries, journals, and authors, as well as the most used, most co-occurring, and most recent keywords. Methods: Based on a data set of 356 publications and 20,363 citations derived from Web of Science, we conducted a bibliometric analysis using BibExcel, HistCite, and VOSviewer. Results: The strongest growth in publications occurred in 2020, accounting for 29.49\% of all publications so far. The most productive countries are the United States, the United Kingdom, and Spain; the most influential countries are the United States, Canada, and the United Kingdom. The most productive journals are the Journal of Medical Internet Research (JMIR), JMIR Serious Games, and the Games for Health Journal; the most influential journals are Patient Education and Counselling, Medical Education, and Quality of Life Research. The most productive authors are Riva, del Piccolo, and Schwebel; the most influential authors are Finset, del Piccolo, and Eide. The most frequently occurring keywords other than "virtual" and "reality" are "training," "trial," and "patients." The most relevant research themes are communication, education, and novel treatments; the most recent research trends are fitness and exergames. Conclusions: The analysis shows that the field has left its infant state and its specialization is advancing, with a clear focus on patient usability.}, language = {en} } @article{PassigTzurielKedmi2015, author = {Passig, David and Tzuriel, David and Kedmi, Ganit Eshel}, title = {Improving children's Cognitive Modifiability through Mediated Learning and Dynamic Assessment within 3D Immersive Virtual Reality Environment}, series = {KEYCIT 2014 - Key Competencies in Informatics and ICT}, journal = {KEYCIT 2014 - Key Competencies in Informatics and ICT}, number = {7}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-82705}, pages = {235 -- 252}, year = {2015}, abstract = {The objectives of this study were to examine (a) the effect of dynamic assessment (DA) in a 3D Immersive Virtual Reality (IVR) environment as compared with computerized 2D and noncomputerized (NC) situations on cognitive modifiability, and (b) the transfer effects of these conditions on more difficult problem solving administered two weeks later in a non-computerized environment. A sample of 117 children aged 6:6-9:0 years were randomly assigned into three experimental groups of DA conditions: 3D, 2D, and NC, and one control group (C). All groups received the pre- and post-teaching Analogies subtest of the Cognitive Modifiability Battery (CMB-AN). The experimental groups received a teaching phase in conditions similar to the pre-and post-teaching phases. The findings showed that cognitive modifiability, in a 3D IVR, was distinctively higher than in the two other experimental groups (2D computer group and NC group). It was also found that the 3D group showed significantly higher performance in transfer problems than the 2D and NC groups.}, language = {en} } @book{PapeTrefferHirschfeldetal.2013, author = {Pape, Tobias and Treffer, Arian and Hirschfeld, Robert and Haupt, Michael}, title = {Extending a Java Virtual Machine to Dynamic Object-oriented Languages}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-266-7}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-67438}, publisher = {Universit{\"a}t Potsdam}, pages = {163}, year = {2013}, abstract = {There are two common approaches to implement a virtual machine (VM) for a dynamic object-oriented language. On the one hand, it can be implemented in a C-like language for best performance and maximum control over the resulting executable. On the other hand, it can be implemented in a language such as Java that allows for higher-level abstractions. These abstractions, such as proper object-oriented modularization, automatic memory management, or interfaces, are missing in C-like languages but they can simplify the implementation of prevalent but complex concepts in VMs, such as garbage collectors (GCs) or just-in-time compilers (JITs). Yet, the implementation of a dynamic object-oriented language in Java eventually results in two VMs on top of each other (double stack), which impedes performance. For statically typed languages, the Maxine VM solves this problem; it is written in Java but can be executed without a Java virtual machine (JVM). However, it is currently not possible to execute dynamic object-oriented languages in Maxine. This work presents an approach to bringing object models and execution models of dynamic object-oriented languages to the Maxine VM and the application of this approach to Squeak/Smalltalk. The representation of objects in and the execution of dynamic object-oriented languages pose certain challenges to the Maxine VM that lacks certain variation points necessary to enable an effortless and straightforward implementation of dynamic object-oriented languages' execution models. The implementation of Squeak/Smalltalk in Maxine as a feasibility study is to unveil such missing variation points.}, language = {en} } @phdthesis{Pape2021, author = {Pape, Tobias}, title = {Efficient compound values in virtual machines}, doi = {10.25932/publishup-49913}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-499134}, school = {Universit{\"a}t Potsdam}, pages = {xxix, 242}, year = {2021}, abstract = {Compound values are not universally supported in virtual machine (VM)-based programming systems and languages. However, providing data structures with value characteristics can be beneficial. On one hand, programming systems and languages can adequately represent physical quantities with compound values and avoid inconsistencies, for example, in representation of large numbers. On the other hand, just-in-time (JIT) compilers, which are often found in VMs, can rely on the fact that compound values are immutable, which is an important property in optimizing programs. Considering this, compound values have an optimization potential that can be put to use by implementing them in VMs in a way that is efficient in memory usage and execution time. Yet, optimized compound values in VMs face certain challenges: to maintain consistency, it should not be observable by the program whether compound values are represented in an optimized way by a VM; an optimization should take into account, that the usage of compound values can exhibit certain patterns at run-time; and that necessary value-incompatible properties due to implementation restrictions should be reduced. We propose a technique to detect and compress common patterns of compound value usage at run-time to improve memory usage and execution speed. Our approach identifies patterns of frequent compound value references and introduces abbreviated forms for them. Thus, it is possible to store multiple inter-referenced compound values in an inlined memory representation, reducing the overhead of metadata and object references. We extend our approach by a notion of limited mutability, using cells that act as barriers for our approach and provide a location for shared, mutable access with the possibility of type specialization. We devise an extension to our approach that allows us to express automatic unboxing of boxed primitive data types in terms of our initial technique. We show that our approach is versatile enough to express another optimization technique that relies on values, such as Booleans, that are unique throughout a programming system. Furthermore, we demonstrate how to re-use learned usage patterns and optimizations across program runs, thus reducing the performance impact of pattern recognition. We show in a best-case prototype that the implementation of our approach is feasible and can also be applied to general purpose programming systems, namely implementations of the Racket language and Squeak/Smalltalk. In several micro-benchmarks, we found that our approach can effectively reduce memory consumption and improve execution speed.}, language = {en} } @misc{PanzerBenderGronau2022, author = {Panzer, Marcel and Bender, Benedict and Gronau, Norbert}, title = {Neural agent-based production planning and control}, series = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Wirtschafts- und Sozialwissenschaftliche Reihe}, journal = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Wirtschafts- und Sozialwissenschaftliche Reihe}, issn = {1867-5808}, doi = {10.25932/publishup-60477}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-604777}, pages = {26}, year = {2022}, abstract = {Nowadays, production planning and control must cope with mass customization, increased fluctuations in demand, and high competition pressures. Despite prevailing market risks, planning accuracy and increased adaptability in the event of disruptions or failures must be ensured, while simultaneously optimizing key process indicators. To manage that complex task, neural networks that can process large quantities of high-dimensional data in real time have been widely adopted in recent years. Although these are already extensively deployed in production systems, a systematic review of applications and implemented agent embeddings and architectures has not yet been conducted. The main contribution of this paper is to provide researchers and practitioners with an overview of applications and applied embeddings and to motivate further research in neural agent-based production. Findings indicate that neural agents are not only deployed in diverse applications, but are also increasingly implemented in multi-agent environments or in combination with conventional methods — leveraging performances compared to benchmarks and reducing dependence on human experience. This not only implies a more sophisticated focus on distributed production resources, but also broadening the perspective from a local to a global scale. Nevertheless, future research must further increase scalability and reproducibility to guarantee a simplified transfer of results to reality.}, language = {en} } @article{PanzerBenderGronau2022, author = {Panzer, Marcel and Bender, Benedict and Gronau, Norbert}, title = {Neural agent-based production planning and control}, series = {Journal of Manufacturing Systems}, volume = {65}, journal = {Journal of Manufacturing Systems}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0278-6125}, doi = {10.1016/j.jmsy.2022.10.019}, pages = {743 -- 766}, year = {2022}, abstract = {Nowadays, production planning and control must cope with mass customization, increased fluctuations in demand, and high competition pressures. Despite prevailing market risks, planning accuracy and increased adaptability in the event of disruptions or failures must be ensured, while simultaneously optimizing key process indicators. To manage that complex task, neural networks that can process large quantities of high-dimensional data in real time have been widely adopted in recent years. Although these are already extensively deployed in production systems, a systematic review of applications and implemented agent embeddings and architectures has not yet been conducted. The main contribution of this paper is to provide researchers and practitioners with an overview of applications and applied embeddings and to motivate further research in neural agent-based production. Findings indicate that neural agents are not only deployed in diverse applications, but are also increasingly implemented in multi-agent environments or in combination with conventional methods — leveraging performances compared to benchmarks and reducing dependence on human experience. This not only implies a more sophisticated focus on distributed production resources, but also broadening the perspective from a local to a global scale. Nevertheless, future research must further increase scalability and reproducibility to guarantee a simplified transfer of results to reality.}, language = {en} } @inproceedings{PalixLawallThomasetal.2010, author = {Palix, Nicolas and Lawall, Julia L. and Thomas, Ga{\"e}l and Muller, Gilles}, title = {How Often do Experts Make Mistakes?}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-41327}, year = {2010}, abstract = {Large open-source software projects involve developers with a wide variety of backgrounds and expertise. Such software projects furthermore include many internal APIs that developers must understand and use properly. According to the intended purpose of these APIs, they are more or less frequently used, and used by developers with more or less expertise. In this paper, we study the impact of usage patterns and developer expertise on the rate of defects occurring in the use of internal APIs. For this preliminary study, we focus on memory management APIs in the Linux kernel, as the use of these has been shown to be highly error prone in previous work. We study defect rates and developer expertise, to consider e.g., whether widely used APIs are more defect prone because they are used by less experienced developers, or whether defects in widely used APIs are more likely to be fixed.}, language = {en} } @article{PaetzelPruesmannPerugiaCastellano2021, author = {Paetzel-Pr{\"u}smann, Maike and Perugia, Giulia and Castellano, Ginevra}, title = {The influence of robot personality on the development of uncanny feelings}, series = {Computers in human behavior}, volume = {120}, journal = {Computers in human behavior}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0747-5632}, doi = {10.1016/j.chb.2021.106756}, pages = {17}, year = {2021}, abstract = {Empirical investigations on the uncanny valley have almost solely focused on the analysis of people?s noninteractive perception of a robot at first sight. Recent studies suggest, however, that these uncanny first impressions may be significantly altered over an interaction. What is yet to discover is whether certain interaction patterns can lead to a faster decline in uncanny feelings. In this paper, we present a study in which participants with limited expertise in Computer Science played a collaborative geography game with a Furhat robot. During the game, Furhat displayed one of two personalities, which corresponded to two different interaction strategies. The robot was either optimistic and encouraging, or impatient and provocative. We performed the study in a science museum and recruited participants among the visitors. Our findings suggest that a robot that is rated high on agreeableness, emotional stability, and conscientiousness can indeed weaken uncanny feelings. This study has important implications for human-robot interaction design as it further highlights that a first impression, merely based on a robot?s appearance, is not indicative of the affinity people might develop towards it throughout an interaction. We thus argue that future work should emphasize investigations on exact interaction patterns that can help to overcome uncanny feelings.}, language = {en} } @misc{OstrowskiSchaub2012, author = {Ostrowski, Max and Schaub, Torsten H.}, title = {ASP modulo CSP}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {579}, issn = {1866-8372}, doi = {10.25932/publishup-41390}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-413908}, pages = {19}, year = {2012}, abstract = {We present the hybrid ASP solver clingcon, combining the simple modeling language and the high performance Boolean solving capacities of Answer Set Programming (ASP) with techniques for using non-Boolean constraints from the area of Constraint Programming (CP). The new clingcon system features an extended syntax supporting global constraints and optimize statements for constraint variables. The major technical innovation improves the interaction between ASP and CP solver through elaborated learning techniques based on irreducible inconsistent sets. A broad empirical evaluation shows that these techniques yield a performance improvement of an order of magnitude.}, language = {en} } @article{OrBach2015, author = {Or-Bach, Rachel}, title = {Programming for Non-Programmers}, series = {KEYCIT 2014 - Key Competencies in Informatics and ICT}, journal = {KEYCIT 2014 - Key Competencies in Informatics and ICT}, number = {7}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-82875}, pages = {335 -- 342}, year = {2015}, abstract = {The study reported in this paper involved the employment of specific in-class exercises using a Personal Response System (PRS). These exercises were designed with two goals: to enhance students' capabilities of tracing a given code and of explaining a given code in natural language with some abstraction. The paper presents evidence from the actual use of the PRS along with students' subjective impressions regarding both the use of the PRS and the special exercises. The conclusions from the findings are followed with a short discussion on benefits of PRS-based mental processing exercises for learning programming and beyond.}, language = {en} } @article{OpelKramerTrommenetal.2015, author = {Opel, Simone and Kramer, Matthias and Trommen, Michael and Pottb{\"a}cker, Florian and Ilaghef, Youssef}, title = {BugHunt}, series = {KEYCIT 2014 - Key Competencies in Informatics and ICT}, journal = {KEYCIT 2014 - Key Competencies in Informatics and ICT}, number = {7}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-82693}, pages = {217 -- 233}, year = {2015}, abstract = {Competencies related to operating systems and computer security are usually taught systematically. In this paper we present a different approach, in which students have to remove virus-like behaviour on their respective computers, which has been induced by software developed for this purpose. They have to develop appropriate problem-solving strategies and thereby explore essential elements of the operating system. The approach was implemented exemplarily in two computer science courses at a regional general upper secondary school and showed great motivation and interest in the participating students.}, language = {en} } @article{Opel2015, author = {Opel, Simone}, title = {On the Way to a "General Model of Contextualised Computer Science Education"}, series = {KEYCIT 2014 - Key Competencies in Informatics and ICT}, journal = {KEYCIT 2014 - Key Competencies in Informatics and ICT}, number = {7}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-82953}, pages = {397 -- 400}, year = {2015}, language = {en} } @article{OosthoekDoerr2021, author = {Oosthoek, Kris and D{\"o}rr, Christian}, title = {Cyber security threats to bitcoin exchanges}, series = {IEEE transactions on network and service management : a publication of the IEEE}, volume = {18}, journal = {IEEE transactions on network and service management : a publication of the IEEE}, number = {2}, publisher = {IEEE}, address = {New York}, issn = {1932-4537}, doi = {10.1109/TNSM.2020.3046145}, pages = {1616 -- 1628}, year = {2021}, abstract = {Bitcoin is gaining traction as an alternative store of value. Its market capitalization transcends all other cryptocurrencies in the market. But its high monetary value also makes it an attractive target to cyber criminal actors. Hacking campaigns usually target an ecosystem's weakest points. In Bitcoin, the exchange platforms are one of them. Each exchange breach is a threat not only to direct victims, but to the credibility of Bitcoin's entire ecosystem. Based on an extensive analysis of 36 breaches of Bitcoin exchanges, we show the attack patterns used to exploit Bitcoin exchange platforms using an industry standard for reporting intelligence on cyber security breaches. Based on this we are able to provide an overview of the most common attack vectors, showing that all except three hacks were possible due to relatively lax security. We show that while the security regimen of Bitcoin exchanges is subpar compared to other financial service providers, the use of stolen credentials, which does not require any hacking, is decreasing. We also show that the amount of BTC taken during a breach is decreasing, as well as the exchanges that terminate after being breached. Furthermore we show that overall security posture has improved, but still has major flaws. To discover adversarial methods post-breach, we have analyzed two cases of BTC laundering. Through this analysis we provide insight into how exchange platforms with lax cyber security even further increase the intermediary risk introduced by them into the Bitcoin ecosystem.}, language = {en} } @article{OmranianAngeleskaNikoloski2021, author = {Omranian, Sara and Angeleska, Angela and Nikoloski, Zoran}, title = {PC2P}, series = {Bioinformatics}, volume = {37}, journal = {Bioinformatics}, number = {1}, publisher = {Oxford Univ. Press}, address = {Oxford}, issn = {1367-4811}, doi = {10.1093/bioinformatics/btaa1089}, pages = {73 -- 81}, year = {2021}, abstract = {Motivation: Prediction of protein complexes from protein-protein interaction (PPI) networks is an important problem in systems biology, as they control different cellular functions. The existing solutions employ algorithms for network community detection that identify dense subgraphs in PPI networks. However, gold standards in yeast and human indicate that protein complexes can also induce sparse subgraphs, introducing further challenges in protein complex prediction. Results: To address this issue, we formalize protein complexes as biclique spanned subgraphs, which include both sparse and dense subgraphs. We then cast the problem of protein complex prediction as a network partitioning into biclique spanned subgraphs with removal of minimum number of edges, called coherent partition. Since finding a coherent partition is a computationally intractable problem, we devise a parameter-free greedy approximation algorithm, termed Protein Complexes from Coherent Partition (PC2P), based on key properties of biclique spanned subgraphs. Through comparison with nine contenders, we demonstrate that PC2P: (i) successfully identifies modular structure in networks, as a prerequisite for protein complex prediction, (ii) outperforms the existing solutions with respect to a composite score of five performance measures on 75\% and 100\% of the analyzed PPI networks and gold standards in yeast and human, respectively, and (iii,iv) does not compromise GO semantic similarity and enrichment score of the predicted protein complexes. Therefore, our study demonstrates that clustering of networks in terms of biclique spanned subgraphs is a promising framework for detection of complexes in PPI networks.}, language = {en} } @article{Ohrndorf2015, author = {Ohrndorf, Laura}, title = {Assignments in Computer Science Education}, series = {KEYCIT 2014 - Key Competencies in Informatics and ICT}, journal = {KEYCIT 2014 - Key Competencies in Informatics and ICT}, number = {7}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-82868}, pages = {327 -- 333}, year = {2015}, abstract = {In this paper we describe the recent state of our research project concerning computer science teachers' knowledge on students' cognition. We did a comprehensive analysis of textbooks, curricula and other resources, which give teachers guidance to formulate assignments. In comparison to other subjects there are only a few concepts and strategies taught to prospective computer science teachers in university. We summarize them and given an overview on our empirical approach to measure this knowledge.}, language = {en} } @inproceedings{OetschSchwengererTompits2010, author = {Oetsch, Johannes and Schwengerer, Martin and Tompits, Hans}, title = {Kato: a plagiarism-detection tool for answer-set programs}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-41485}, year = {2010}, abstract = {We present the tool Kato which is, to the best of our knowledge, the first tool for plagiarism detection that is directly tailored for answer-set programming (ASP). Kato aims at finding similarities between (segments of) logic programs to help detecting cases of plagiarism. Currently, the tool is realised for DLV programs but it is designed to handle various logic-programming syntax versions. We review basic features and the underlying methodology of the tool.}, language = {en} } @article{NylenDoerge2013, author = {Nyl{\´e}n, Aletta and D{\"o}rge, Christina}, title = {Using competencies to structure scientific writing education}, series = {Commentarii informaticae didacticae : (CID)}, journal = {Commentarii informaticae didacticae : (CID)}, number = {5}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-64851}, pages = {33 -- 44}, year = {2013}, abstract = {Scientific writing is an important skill for computer science and computer engineering professionals. In this paper we present a writing concept across the curriculum program directed towards scientific writing. The program is built around a hierarchy of learning outcomes. The hierarchy is constructed through analyzing the learning outcomes in relation to competencies that are needed to fulfill them.}, language = {en} } @article{Noack2014, author = {Noack, Franziska}, title = {CREADED: Colored-Relief application for digital elevation data}, series = {Process design for natural scientists: an agile model-driven approach}, journal = {Process design for natural scientists: an agile model-driven approach}, number = {500}, publisher = {Springer}, address = {Berlin}, isbn = {978-3-662-45005-5}, issn = {1865-0929}, pages = {186 -- 199}, year = {2014}, abstract = {In the geoinformatics field, remote sensing data is often used for analyzing the characteristics of the current investigation area. This includes DEMs, which are simple raster grids containing grey scales representing the respective elevation values. The project CREADED that is presented in this paper aims at making these monochrome raster images more significant and more intuitively interpretable. For this purpose, an executable interactive model for creating a colored and relief-shaded Digital Elevation Model (DEM) has been designed using the jABC framework. The process is based on standard jABC-SIBs and SIBs that provide specific GIS functions, which are available as Web services, command line tools and scripts.}, language = {en} } @book{NiephausFelgentreffHirschfeld2017, author = {Niephaus, Fabio and Felgentreff, Tim and Hirschfeld, Robert}, title = {Squimera}, number = {120}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-422-7}, doi = {10.25932/publishup-40338}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-403387}, publisher = {Universit{\"a}t Potsdam}, pages = {92}, year = {2017}, abstract = {Programmierwerkzeuge, die verschiedene Programmiersprachen unterst{\"u}tzen und sich konsistent bedienen lassen, sind hilfreich f{\"u}r Softwareentwickler, weil diese sich nicht erst mit neuen Werkzeugen vertraut machen m{\"u}ssen, wenn sie in einer neuen Sprache entwickeln wollen. Außerdem ist es n{\"u}tzlich, verschiedene Programmiersprachen in einer Anwendung kombinieren zu k{\"o}nnen, da Entwickler dann Softwareframeworks und -bibliotheken nicht in der jeweiligen Sprache nachbauen m{\"u}ssen und stattdessen bestehende Software wiederverwenden k{\"o}nnen. Dennoch haben Entwickler eine sehr große Auswahl, wenn sie nach Werkzeugen suchen, die teilweise zudem speziell nur f{\"u}r eine Sprache ausgelegt sind. Einige integrierte Entwicklungsumgebungen unterst{\"u}tzen verschiedene Programmiersprachen, k{\"o}nnen aber h{\"a}ufig keine konsistente Bedienung ihrer Werkzeuge gew{\"a}hrleisten, da die jeweiligen Ausf{\"u}hrungsumgebungen der Sprachen zu verschieden sind. Dar{\"u}ber hinaus gibt es bereits Mechansimen, die es erlauben, Programme aus anderen Sprachen in einem Programm wiederzuverwenden. Dazu werden h{\"a}ufig das Betriebssystem oder eine Netzwerkverbindung verwendet. Programmierwerkzeuge unterst{\"u}tzen jedoch h{\"a}ufig eine solche Indirektion nicht und sind deshalb nur eingeschr{\"a}nkt nutzbar bei beispielsweise Debugging Szenarien. In dieser Arbeit stellen wir einen neuartigen Ansatz vor, der das Programmiererlebnis in Bezug auf das Arbeiten mit mehreren dynamischen Programmiersprachen verbessern soll. Dazu verwenden wir die Werkzeuge einer Smalltalk Programmierumgebung wieder und entwickeln eine virtuelle Ausf{\"u}hrungsumgebung, die verschiedene Sprachen gleichermaßen unterst{\"u}tzt. Der auf unserem Ansatz basierende Prototyp Squimera demonstriert, dass es m{\"o}glich ist, Programmierwerkzeuge in der Art wiederzuverwenden, sodass sie sich f{\"u}r verschiedene Programmiersprachen gleich verhalten und somit die Arbeit f{\"u}r Entwickler vereinfachen. Außerdem erm{\"o}glicht Squimera einfaches Wiederverwenden und dar{\"u}ber hinaus das Verschmischen von in unterschiedlichen Sprachen geschriebenen Softwarebibliotheken und -frameworks und erlaubt dabei zus{\"a}tzlich Debugging {\"u}ber mehrere Sprachen hinweg.}, language = {en} } @phdthesis{Niephaus2022, author = {Niephaus, Fabio}, title = {Exploratory tool-building platforms for polyglot virtual machines}, doi = {10.25932/publishup-57177}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-571776}, school = {Universit{\"a}t Potsdam}, pages = {xxi, 249}, year = {2022}, abstract = {Polyglot programming allows developers to use multiple programming languages within the same software project. While it is common to use more than one language in certain programming domains, developers also apply polyglot programming for other purposes such as to re-use software written in other languages. Although established approaches to polyglot programming come with significant limitations, for example, in terms of performance and tool support, developers still use them to be able to combine languages. Polyglot virtual machines (VMs) such as GraalVM provide a new level of polyglot programming, allowing languages to directly interact with each other. This reduces the amount of glue code needed to combine languages, results in better performance, and enables tools such as debuggers to work across languages. However, only a little research has focused on novel tools that are designed to support developers in building software with polyglot VMs. One reason is that tool-building is often an expensive activity, another one is that polyglot VMs are still a moving target as their use cases and requirements are not yet well understood. In this thesis, we present an approach that builds on existing self-sustaining programming systems such as Squeak/Smalltalk to enable exploratory programming, a practice for exploring and gathering software requirements, and re-use their extensive tool-building capabilities in the context of polyglot VMs. Based on TruffleSqueak, our implementation for the GraalVM, we further present five case studies that demonstrate how our approach helps tool developers to design and build tools for polyglot programming. We further show that TruffleSqueak can also be used by application developers to build and evolve polyglot applications at run-time and by language and runtime developers to understand the dynamic behavior of GraalVM languages and internals. Since our platform allows all these developers to apply polyglot programming, it can further help to better understand the advantages, use cases, requirements, and challenges of polyglot VMs. Moreover, we demonstrate that our approach can also be applied to other polyglot VMs and that insights gained through it are transferable to other programming systems. We conclude that our research on tools for polyglot programming is an important step toward making polyglot VMs more approachable for developers in practice. With good tool support, we believe polyglot VMs can make it much more common for developers to take advantage of multiple languages and their ecosystems when building software.}, language = {en} } @book{NienhausGoochDoellner2006, author = {Nienhaus, Marc and Gooch, Bruce and D{\"o}llner, J{\"u}rgen Roland Friedrich}, title = {Visualizing movement dynamics in virtual urban environments}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-939469-52-0}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-33065}, publisher = {Universit{\"a}t Potsdam}, pages = {7}, year = {2006}, abstract = {Dynamics in urban environments encompasses complex processes and phenomena such as related to movement (e.g.,traffic, people) and development (e.g., construction, settlement). This paper presents novel methods for creating human-centric illustrative maps for visualizing the movement dynamics in virtual 3D environments. The methods allow a viewer to gain rapid insight into traffic density and flow. The illustrative maps represent vehicle behavior as light threads. Light threads are a familiar visual metaphor caused by moving light sources producing streaks in a long-exposure photograph. A vehicle's front and rear lights produce light threads that convey its direction of motion as well as its velocity and acceleration. The accumulation of light threads allows a viewer to quickly perceive traffic flow and density. The light-thread technique is a key element to effective visualization systems for analytic reasoning, exploration, and monitoring of geospatial processes.}, language = {en} } @book{NeuhausPolzeChowdhuryy2011, author = {Neuhaus, Christian and Polze, Andreas and Chowdhuryy, Mohammad M. R.}, title = {Survey on healthcare IT systems : standards, regulations and security}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-128-8}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-51463}, publisher = {Universit{\"a}t Potsdam}, pages = {53}, year = {2011}, abstract = {IT systems for healthcare are a complex and exciting field. One the one hand, there is a vast number of improvements and work alleviations that computers can bring to everyday healthcare. Some ways of treatment, diagnoses and organisational tasks were even made possible by computer usage in the first place. On the other hand, there are many factors that encumber computer usage and make development of IT systems for healthcare a challenging, sometimes even frustrating task. These factors are not solely technology-related, but just as well social or economical conditions. This report describes some of the idiosyncrasies of IT systems in the healthcare domain, with a special focus on legal regulations, standards and security.}, language = {en} } @misc{NeherKniepertElimelechetal.2016, author = {Neher, Dieter and Kniepert, Juliane and Elimelech, Arik and Koster, L. Jan Anton}, title = {A New Figure of Merit for Organic Solar Cells with Transport-limited Photocurrents}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-91414}, pages = {9}, year = {2016}, abstract = {Compared to their inorganic counterparts, organic semiconductors suffer from relatively low charge carrier mobilities. Therefore, expressions derived for inorganic solar cells to correlate characteristic performance parameters to material properties are prone to fail when applied to organic devices. This is especially true for the classical Shockley-equation commonly used to describe current-voltage (JV)-curves, as it assumes a high electrical conductivity of the charge transporting material. Here, an analytical expression for the JV-curves of organic solar cells is derived based on a previously published analytical model. This expression, bearing a similar functional dependence as the Shockley-equation, delivers a new figure of merit α to express the balance between free charge recombination and extraction in low mobility photoactive materials. This figure of merit is shown to determine critical device parameters such as the apparent series resistance and the fill factor.}, language = {en} } @article{NeherKniepertElimelechetal.2016, author = {Neher, Dieter and Kniepert, Juliane and Elimelech, Arik and Koster, L. Jan Anton}, title = {A New Figure of Merit for Organic Solar Cells with Transport-limited Photocurrents}, series = {Scientific reports}, volume = {6}, journal = {Scientific reports}, publisher = {Nature Publishing Group}, address = {London}, issn = {2045-2322}, doi = {10.1038/srep24861}, pages = {9}, year = {2016}, abstract = {Compared to their inorganic counterparts, organic semiconductors suffer from relatively low charge carrier mobilities. Therefore, expressions derived for inorganic solar cells to correlate characteristic performance parameters to material properties are prone to fail when applied to organic devices. This is especially true for the classical Shockley-equation commonly used to describe current-voltage (JV)-curves, as it assumes a high electrical conductivity of the charge transporting material. Here, an analytical expression for the JV-curves of organic solar cells is derived based on a previously published analytical model. This expression, bearing a similar functional dependence as the Shockley-equation, delivers a new figure of merit α to express the balance between free charge recombination and extraction in low mobility photoactive materials. This figure of merit is shown to determine critical device parameters such as the apparent series resistance and the fill factor.}, language = {en} }