@book{BauckmannLeserNaumann2010, author = {Bauckmann, Jana and Leser, Ulf and Naumann, Felix}, title = {Efficient and exact computation of inclusion dependencies for data integration}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-048-9}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-41396}, publisher = {Universit{\"a}t Potsdam}, pages = {36}, year = {2010}, abstract = {Data obtained from foreign data sources often come with only superficial structural information, such as relation names and attribute names. Other types of metadata that are important for effective integration and meaningful querying of such data sets are missing. In particular, relationships among attributes, such as foreign keys, are crucial metadata for understanding the structure of an unknown database. The discovery of such relationships is difficult, because in principle for each pair of attributes in the database each pair of data values must be compared. A precondition for a foreign key is an inclusion dependency (IND) between the key and the foreign key attributes. We present with Spider an algorithm that efficiently finds all INDs in a given relational database. It leverages the sorting facilities of DBMS but performs the actual comparisons outside of the database to save computation. Spider analyzes very large databases up to an order of magnitude faster than previous approaches. We also evaluate in detail the effectiveness of several heuristics to reduce the number of necessary comparisons. Furthermore, we generalize Spider to find composite INDs covering multiple attributes, and partial INDs, which are true INDs for all but a certain number of values. This last type is particularly relevant when integrating dirty data as is often the case in the life sciences domain - our driving motivation.}, language = {en} } @phdthesis{Becker2013, author = {Becker, Basil}, title = {Architectural modelling and verification of open service-oriented systems of systems}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-70158}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {Systems of Systems (SoS) have received a lot of attention recently. In this thesis we will focus on SoS that are built atop the techniques of Service-Oriented Architectures and thus combine the benefits and challenges of both paradigms. For this thesis we will understand SoS as ensembles of single autonomous systems that are integrated to a larger system, the SoS. The interesting fact about these systems is that the previously isolated systems are still maintained, improved and developed on their own. Structural dynamics is an issue in SoS, as at every point in time systems can join and leave the ensemble. This and the fact that the cooperation among the constituent systems is not necessarily observable means that we will consider these systems as open systems. Of course, the system has a clear boundary at each point in time, but this can only be identified by halting the complete SoS. However, halting a system of that size is practically impossible. Often SoS are combinations of software systems and physical systems. Hence a failure in the software system can have a serious physical impact what makes an SoS of this kind easily a safety-critical system. The contribution of this thesis is a modelling approach that extends OMG's SoaML and basically relies on collaborations and roles as an abstraction layer above the components. This will allow us to describe SoS at an architectural level. We will also give a formal semantics for our modelling approach which employs hybrid graph-transformation systems. The modelling approach is accompanied by a modular verification scheme that will be able to cope with the complexity constraints implied by the SoS' structural dynamics and size. Building such autonomous systems as SoS without evolution at the architectural level --- i. e. adding and removing of components and services --- is inadequate. Therefore our approach directly supports the modelling and verification of evolution.}, language = {en} } @book{BeckerGiese2012, author = {Becker, Basil and Giese, Holger}, title = {Cyber-physical systems with dynamic structure : towards modeling and verification of inductive invariants}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-217-9}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-62437}, publisher = {Universit{\"a}t Potsdam}, pages = {iv, 27}, year = {2012}, abstract = {Cyber-physical systems achieve sophisticated system behavior exploring the tight interconnection of physical coupling present in classical engineering systems and information technology based coupling. A particular challenging case are systems where these cyber-physical systems are formed ad hoc according to the specific local topology, the available networking capabilities, and the goals and constraints of the subsystems captured by the information processing part. In this paper we present a formalism that permits to model the sketched class of cyber-physical systems. The ad hoc formation of tightly coupled subsystems of arbitrary size are specified using a UML-based graph transformation system approach. Differential equations are employed to define the resulting tightly coupled behavior. Together, both form hybrid graph transformation systems where the graph transformation rules define the discrete steps where the topology or modes may change, while the differential equations capture the continuous behavior in between such discrete changes. In addition, we demonstrate that automated analysis techniques known for timed graph transformation systems for inductive invariants can be extended to also cover the hybrid case for an expressive case of hybrid models where the formed tightly coupled subsystems are restricted to smaller local networks.}, language = {en} } @book{BeckerGieseNeumann2009, author = {Becker, Basil and Giese, Holger and Neumann, Stefan}, title = {Correct dynamic service-oriented architectures : modeling and compositional verification with dynamic collaborations}, organization = {System Analysis and Modeling Group}, isbn = {978-3-940793-91-1}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-30473}, publisher = {Universit{\"a}t Potsdam}, year = {2009}, abstract = {Service-oriented modeling employs collaborations to capture the coordination of multiple roles in form of service contracts. In case of dynamic collaborations the roles may join and leave the collaboration at runtime and therefore complex structural dynamics can result, which makes it very hard to ensure their correct and safe operation. We present in this paper our approach for modeling and verifying such dynamic collaborations. Modeling is supported using a well-defined subset of UML class diagrams, behavioral rules for the structural dynamics, and UML state machines for the role behavior. To be also able to verify the resulting service-oriented systems, we extended our former results for the automated verification of systems with structural dynamics [7, 8] and developed a compositional reasoning scheme, which enables the reuse of verification results. We outline our approach using the example of autonomous vehicles that use such dynamic collaborations via ad-hoc networking to coordinate and optimize their joint behavior.}, language = {en} } @phdthesis{Berg2013, author = {Berg, Gregor}, title = {Virtual prototypes for the model-based elicitation and validation of collaborative scenarios}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-69729}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {Requirements engineers have to elicit, document, and validate how stakeholders act and interact to achieve their common goals in collaborative scenarios. Only after gathering all information concerning who interacts with whom to do what and why, can a software system be designed and realized which supports the stakeholders to do their work. To capture and structure requirements of different (groups of) stakeholders, scenario-based approaches have been widely used and investigated. Still, the elicitation and validation of requirements covering collaborative scenarios remains complicated, since the required information is highly intertwined, fragmented, and distributed over several stakeholders. Hence, it can only be elicited and validated collaboratively. In times of globally distributed companies, scheduling and conducting workshops with groups of stakeholders is usually not feasible due to budget and time constraints. Talking to individual stakeholders, on the other hand, is feasible but leads to fragmented and incomplete stakeholder scenarios. Going back and forth between different individual stakeholders to resolve this fragmentation and explore uncovered alternatives is an error-prone, time-consuming, and expensive task for the requirements engineers. While formal modeling methods can be employed to automatically check and ensure consistency of stakeholder scenarios, such methods introduce additional overhead since their formal notations have to be explained in each interaction between stakeholders and requirements engineers. Tangible prototypes as they are used in other disciplines such as design, on the other hand, allow designers to feasibly validate and iterate concepts and requirements with stakeholders. This thesis proposes a model-based approach for prototyping formal behavioral specifications of stakeholders who are involved in collaborative scenarios. By simulating and animating such specifications in a remote domain-specific visualization, stakeholders can experience and validate the scenarios captured so far, i.e., how other stakeholders act and react. This interactive scenario simulation is referred to as a model-based virtual prototype. Moreover, through observing how stakeholders interact with a virtual prototype of their collaborative scenarios, formal behavioral specifications can be automatically derived which complete the otherwise fragmented scenarios. This, in turn, enables requirements engineers to elicit and validate collaborative scenarios in individual stakeholder sessions - decoupled, since stakeholders can participate remotely and are not forced to be available for a joint session at the same time. This thesis discusses and evaluates the feasibility, understandability, and modifiability of model-based virtual prototypes. Similarly to how physical prototypes are perceived, the presented approach brings behavioral models closer to being tangible for stakeholders and, moreover, combines the advantages of joint stakeholder sessions and decoupled sessions.}, language = {en} } @book{BeyhlBlouinGieseetal.2016, author = {Beyhl, Thomas and Blouin, Dominique and Giese, Holger and Lambers, Leen}, title = {On the operationalization of graph queries with generalized discrimination networks}, number = {106}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-372-5}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-96279}, publisher = {Universit{\"a}t Potsdam}, pages = {33}, year = {2016}, abstract = {Graph queries have lately gained increased interest due to application areas such as social networks, biological networks, or model queries. For the relational database case the relational algebra and generalized discrimination networks have been studied to find appropriate decompositions into subqueries and ordering of these subqueries for query evaluation or incremental updates of query results. For graph database queries however there is no formal underpinning yet that allows us to find such suitable operationalizations. Consequently, we suggest a simple operational concept for the decomposition of arbitrary complex queries into simpler subqueries and the ordering of these subqueries in form of generalized discrimination networks for graph queries inspired by the relational case. The approach employs graph transformation rules for the nodes of the network and thus we can employ the underlying theory. We further show that the proposed generalized discrimination networks have the same expressive power as nested graph conditions.}, language = {en} } @book{BeyhlGiese2015, author = {Beyhl, Thomas and Giese, Holger}, title = {Efficient and scalable graph view maintenance for deductive graph databases based on generalized discrimination networks}, number = {99}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-339-8}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-79535}, publisher = {Universit{\"a}t Potsdam}, pages = {148}, year = {2015}, abstract = {Graph databases provide a natural way of storing and querying graph data. In contrast to relational databases, queries over graph databases enable to refer directly to the graph structure of such graph data. For example, graph pattern matching can be employed to formulate queries over graph data. However, as for relational databases running complex queries can be very time-consuming and ruin the interactivity with the database. One possible approach to deal with this performance issue is to employ database views that consist of pre-computed answers to common and often stated queries. But to ensure that database views yield consistent query results in comparison with the data from which they are derived, these database views must be updated before queries make use of these database views. Such a maintenance of database views must be performed efficiently, otherwise the effort to create and maintain views may not pay off in comparison to processing the queries directly on the data from which the database views are derived. At the time of writing, graph databases do not support database views and are limited to graph indexes that index nodes and edges of the graph data for fast query evaluation, but do not enable to maintain pre-computed answers of complex queries over graph data. Moreover, the maintenance of database views in graph databases becomes even more challenging when negation and recursion have to be supported as in deductive relational databases. In this technical report, we present an approach for the efficient and scalable incremental graph view maintenance for deductive graph databases. The main concept of our approach is a generalized discrimination network that enables to model nested graph conditions including negative application conditions and recursion, which specify the content of graph views derived from graph data stored by graph databases. The discrimination network enables to automatically derive generic maintenance rules using graph transformations for maintaining graph views in case the graph data from which the graph views are derived change. We evaluate our approach in terms of a case study using multiple data sets derived from open source projects.}, language = {en} } @article{Boissier2021, author = {Boissier, Martin}, title = {Robust and budget-constrained encoding configurations for in-memory database systems}, series = {Proceedings of the VLDB Endowment}, volume = {15}, journal = {Proceedings of the VLDB Endowment}, number = {4}, publisher = {Association for Computing Machinery (ACM)}, address = {[New York]}, issn = {2150-8097}, doi = {10.14778/3503585.3503588}, pages = {780 -- 793}, year = {2021}, abstract = {Data encoding has been applied to database systems for decades as it mitigates bandwidth bottlenecks and reduces storage requirements. But even in the presence of these advantages, most in-memory database systems use data encoding only conservatively as the negative impact on runtime performance can be severe. Real-world systems with large parts being infrequently accessed and cost efficiency constraints in cloud environments require solutions that automatically and efficiently select encoding techniques, including heavy-weight compression. In this paper, we introduce workload-driven approaches to automaticaly determine memory budget-constrained encoding configurations using greedy heuristics and linear programming. We show for TPC-H, TPC-DS, and the Join Order Benchmark that optimized encoding configurations can reduce the main memory footprint significantly without a loss in runtime performance over state-of-the-art dictionary encoding. To yield robust selections, we extend the linear programming-based approach to incorporate query runtime constraints and mitigate unexpected performance regressions.}, language = {en} } @article{BonifatiMiorNaumannetal.2022, author = {Bonifati, Angela and Mior, Michael J. and Naumann, Felix and Noack, Nele Sina}, title = {How inclusive are we?}, series = {SIGMOD record / Association for Computing Machinery, Special Interest Group on Management of Data}, volume = {50}, journal = {SIGMOD record / Association for Computing Machinery, Special Interest Group on Management of Data}, number = {4}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {0163-5808}, doi = {10.1145/3516431.3516438}, pages = {30 -- 35}, year = {2022}, abstract = {ACM SIGMOD, VLDB and other database organizations have committed to fostering an inclusive and diverse community, as do many other scientific organizations. Recently, different measures have been taken to advance these goals, especially for underrepresented groups. One possible measure is double-blind reviewing, which aims to hide gender, ethnicity, and other properties of the authors.
We report the preliminary results of a gender diversity analysis of publications of the database community across several peer-reviewed venues, and also compare women's authorship percentages in both single-blind and double-blind venues along the years. We also obtained a cross comparison of the obtained results in data management with other relevant areas in Computer Science.}, language = {en} } @article{BorchertMockTomczaketal.2021, author = {Borchert, Florian and Mock, Andreas and Tomczak, Aurelie and H{\"u}gel, Jonas and Alkarkoukly, Samer and Knurr, Alexander and Volckmar, Anna-Lena and Stenzinger, Albrecht and Schirmacher, Peter and Debus, J{\"u}rgen and J{\"a}ger, Dirk and Longerich, Thomas and Fr{\"o}hling, Stefan and Eils, Roland and Bougatf, Nina and Sax, Ulrich and Schapranow, Matthieu-Patrick}, title = {Correction to: Knowledge bases and software support for variant interpretation in precision oncology}, series = {Briefings in bioinformatics}, volume = {22}, journal = {Briefings in bioinformatics}, number = {6}, publisher = {Oxford Univ. Press}, address = {Oxford}, issn = {1467-5463}, doi = {10.1093/bib/bbab246}, pages = {1}, year = {2021}, language = {en} } @article{BorchertMockTomczaketal.2021, author = {Borchert, Florian and Mock, Andreas and Tomczak, Aurelie and H{\"u}gel, Jonas and Alkarkoukly, Samer and Knurr, Alexander and Volckmar, Anna-Lena and Stenzinger, Albrecht and Schirmacher, Peter and Debus, J{\"u}rgen and J{\"a}ger, Dirk and Longerich, Thomas and Fr{\"o}hling, Stefan and Eils, Roland and Bougatf, Nina and Sax, Ulrich and Schapranow, Matthieu-Patrick}, title = {Knowledge bases and software support for variant interpretation in precision oncology}, series = {Briefings in bioinformatics}, volume = {22}, journal = {Briefings in bioinformatics}, number = {6}, publisher = {Oxford Univ. Press}, address = {Oxford}, issn = {1467-5463}, doi = {10.1093/bib/bbab134}, pages = {17}, year = {2021}, abstract = {Precision oncology is a rapidly evolving interdisciplinary medical specialty. Comprehensive cancer panels are becoming increasingly available at pathology departments worldwide, creating the urgent need for scalable cancer variant annotation and molecularly informed treatment recommendations. A wealth of mainly academia-driven knowledge bases calls for software tools supporting the multi-step diagnostic process. We derive a comprehensive list of knowledge bases relevant for variant interpretation by a review of existing literature followed by a survey among medical experts from university hospitals in Germany. In addition, we review cancer variant interpretation tools, which integrate multiple knowledge bases. We categorize the knowledge bases along the diagnostic process in precision oncology and analyze programmatic access options as well as the integration of knowledge bases into software tools. The most commonly used knowledge bases provide good programmatic access options and have been integrated into a range of software tools. For the wider set of knowledge bases, access options vary across different parts of the diagnostic process. Programmatic access is limited for information regarding clinical classifications of variants and for therapy recommendations. The main issue for databases used for biological classification of pathogenic variants and pathway context information is the lack of standardized interfaces. There is no single cancer variant interpretation tool that integrates all identified knowledge bases. Specialized tools are available and need to be further developed for different steps in the diagnostic process.}, language = {en} } @book{BreestBoucheGrundetal.2006, author = {Breest, Martin and Bouch{\´e}, Paul and Grund, Martin and Haubrock, S{\"o}ren and H{\"u}ttenrauch, Stefan and Kylau, Uwe and Ploskonos, Anna and Queck, Tobias and Schreiter, Torben}, title = {Fundamentals of Service-Oriented Engineering}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-939469-35-3}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-33801}, publisher = {Universit{\"a}t Potsdam}, pages = {Getr. Z{\"a}hlung}, year = {2006}, abstract = {Since 2002, keywords like service-oriented engineering, service-oriented computing, and service-oriented architecture have been widely used in research, education, and enterprises. These and related terms are often misunderstood or used incorrectly. To correct these misunderstandings, a deeper knowledge of the concepts, the historical backgrounds, and an overview of service-oriented architectures is demanded and given in this paper.}, language = {en} } @inproceedings{BynensVanLanduytTruyenetal.2010, author = {Bynens, Maarten and Van Landuyt, Dimitri and Truyen, Eddy and Joosen, Wouter}, title = {Towards reusable aspects: the callback mismatch problem}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-41347}, year = {2010}, abstract = {Because software development is increasingly expensive and timeconsuming, software reuse gains importance. Aspect-oriented software development modularizes crosscutting concerns which enables their systematic reuse. Literature provides a number of AOP patterns and best practices for developing reusable aspects based on compelling examples for concerns like tracing, transactions and persistence. However, such best practices are lacking for systematically reusing invasive aspects. In this paper, we present the 'callback mismatch problem'. This problem arises in the context of abstraction mismatch, in which the aspect is required to issue a callback to the base application. As a consequence, the composition of invasive aspects is cumbersome to implement, difficult to maintain and impossible to reuse. We motivate this problem in a real-world example, show that it persists in the current state-of-the-art, and outline the need for advanced aspectual composition mechanisms to deal with this.}, language = {en} } @phdthesis{Boehm2013, author = {B{\"o}hm, Christoph}, title = {Enriching the Web of Data with topics and links}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-68624}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {This thesis presents novel ideas and research findings for the Web of Data - a global data space spanning many so-called Linked Open Data sources. Linked Open Data adheres to a set of simple principles to allow easy access and reuse for data published on the Web. Linked Open Data is by now an established concept and many (mostly academic) publishers adopted the principles building a powerful web of structured knowledge available to everybody. However, so far, Linked Open Data does not yet play a significant role among common web technologies that currently facilitate a high-standard Web experience. In this work, we thoroughly discuss the state-of-the-art for Linked Open Data and highlight several shortcomings - some of them we tackle in the main part of this work. First, we propose a novel type of data source meta-information, namely the topics of a dataset. This information could be published with dataset descriptions and support a variety of use cases, such as data source exploration and selection. For the topic retrieval, we present an approach coined Annotated Pattern Percolation (APP), which we evaluate with respect to topics extracted from Wikipedia portals. Second, we contribute to entity linking research by presenting an optimization model for joint entity linking, showing its hardness, and proposing three heuristics implemented in the LINked Data Alignment (LINDA) system. Our first solution can exploit multi-core machines, whereas the second and third approach are designed to run in a distributed shared-nothing environment. We discuss and evaluate the properties of our approaches leading to recommendations which algorithm to use in a specific scenario. The distributed algorithms are among the first of their kind, i.e., approaches for joint entity linking in a distributed fashion. Also, we illustrate that we can tackle the entity linking problem on the very large scale with data comprising more than 100 millions of entity representations from very many sources. Finally, we approach a sub-problem of entity linking, namely the alignment of concepts. We again target a method that looks at the data in its entirety and does not neglect existing relations. Also, this concept alignment method shall execute very fast to serve as a preprocessing for further computations. Our approach, called Holistic Concept Matching (HCM), achieves the required speed through grouping the input by comparing so-called knowledge representations. Within the groups, we perform complex similarity computations, relation conclusions, and detect semantic contradictions. The quality of our result is again evaluated on a large and heterogeneous dataset from the real Web. In summary, this work contributes a set of techniques for enhancing the current state of the Web of Data. All approaches have been tested on large and heterogeneous real-world input.}, language = {en} } @book{CalmezHesseSiegmundetal.2013, author = {Calmez, Conrad and Hesse, Hubert and Siegmund, Benjamin and Stamm, Sebastian and Thomschke, Astrid and Hirschfeld, Robert and Ingalls, Dan and Lincke, Jens}, title = {Explorative authoring of Active Web content in a mobile environment}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-232-2}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-64054}, publisher = {Universit{\"a}t Potsdam}, pages = {132}, year = {2013}, abstract = {Developing rich Web applications can be a complex job - especially when it comes to mobile device support. Web-based environments such as Lively Webwerkstatt can help developers implement such applications by making the development process more direct and interactive. Further the process of developing software is collaborative which creates the need that the development environment offers collaboration facilities. This report describes extensions of the webbased development environment Lively Webwerkstatt such that it can be used in a mobile environment. The extensions are collaboration mechanisms, user interface adaptations but as well event processing and performance measuring on mobile devices.}, language = {en} } @article{ChauhanFriedrichRothenberger2020, author = {Chauhan, Ankit and Friedrich, Tobias and Rothenberger, Ralf}, title = {Greed is good for deterministic scale-free networks}, series = {Algorithmica : an international journal in computer science}, volume = {82}, journal = {Algorithmica : an international journal in computer science}, number = {11}, publisher = {Springer}, address = {New York}, issn = {0178-4617}, doi = {10.1007/s00453-020-00729-z}, pages = {3338 -- 3389}, year = {2020}, abstract = {Large real-world networks typically follow a power-law degree distribution. To study such networks, numerous random graph models have been proposed. However, real-world networks are not drawn at random. Therefore, Brach et al. (27th symposium on discrete algorithms (SODA), pp 1306-1325, 2016) introduced two natural deterministic conditions: (1) a power-law upper bound on the degree distribution (PLB-U) and (2) power-law neighborhoods, that is, the degree distribution of neighbors of each vertex is also upper bounded by a power law (PLB-N). They showed that many real-world networks satisfy both properties and exploit them to design faster algorithms for a number of classical graph problems. We complement their work by showing that some well-studied random graph models exhibit both of the mentioned PLB properties. PLB-U and PLB-N hold with high probability for Chung-Lu Random Graphs and Geometric Inhomogeneous Random Graphs and almost surely for Hyperbolic Random Graphs. As a consequence, all results of Brach et al. also hold with high probability or almost surely for those random graph classes. In the second part we study three classical NP-hard optimization problems on PLB networks. It is known that on general graphs with maximum degree Delta, a greedy algorithm, which chooses nodes in the order of their degree, only achieves a Omega (ln Delta)-approximation forMinimum Vertex Cover and Minimum Dominating Set, and a Omega(Delta)-approximation forMaximum Independent Set. We prove that the PLB-U property with beta>2 suffices for the greedy approach to achieve a constant-factor approximation for all three problems. We also show that these problems are APX-hard even if PLB-U, PLB-N, and an additional power-law lower bound on the degree distribution hold. Hence, a PTAS cannot be expected unless P = NP. Furthermore, we prove that all three problems are in MAX SNP if the PLB-U property holds.}, language = {en} } @article{ChujfiLaRocheMeinel2017, author = {Chujfi-La-Roche, Salim and Meinel, Christoph}, title = {Matching cognitively sympathetic individual styles to develop collective intelligence in digital communities}, series = {AI \& society : the journal of human-centred systems and machine intelligence}, volume = {35}, journal = {AI \& society : the journal of human-centred systems and machine intelligence}, number = {1}, publisher = {Springer}, address = {New York}, issn = {0951-5666}, doi = {10.1007/s00146-017-0780-x}, pages = {5 -- 15}, year = {2017}, abstract = {Creation, collection and retention of knowledge in digital communities is an activity that currently requires being explicitly targeted as a secure method of keeping intellectual capital growing in the digital era. In particular, we consider it relevant to analyze and evaluate the empathetic cognitive personalities and behaviors that individuals now have with the change from face-to-face communication (F2F) to computer-mediated communication (CMC) online. This document proposes a cyber-humanistic approach to enhance the traditional SECI knowledge management model. A cognitive perception is added to its cyclical process following design thinking interaction, exemplary for improvement of the method in which knowledge is continuously created, converted and shared. In building a cognitive-centered model, we specifically focus on the effective identification and response to cognitive stimulation of individuals, as they are the intellectual generators and multiplicators of knowledge in the online environment. Our target is to identify how geographically distributed-digital-organizations should align the individual's cognitive abilities to promote iteration and improve interaction as a reliable stimulant of collective intelligence. The new model focuses on analyzing the four different stages of knowledge processing, where individuals with sympathetic cognitive personalities can significantly boost knowledge creation in a virtual social system. For organizations, this means that multidisciplinary individuals can maximize their extensive potential, by externalizing their knowledge in the correct stage of the knowledge creation process, and by collaborating with their appropriate sympathetically cognitive remote peers.}, language = {en} } @phdthesis{Dawoud2013, author = {Dawoud, Wesam}, title = {Scalability and performance management of internet applications in the cloud}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-68187}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {Cloud computing is a model for enabling on-demand access to a shared pool of computing resources. With virtually limitless on-demand resources, a cloud environment enables the hosted Internet application to quickly cope when there is an increase in the workload. However, the overhead of provisioning resources exposes the Internet application to periods of under-provisioning and performance degradation. Moreover, the performance interference, due to the consolidation in the cloud environment, complicates the performance management of the Internet applications. In this dissertation, we propose two approaches to mitigate the impact of the resources provisioning overhead. The first approach employs control theory to scale resources vertically and cope fast with workload. This approach assumes that the provider has knowledge and control over the platform running in the virtual machines (VMs), which limits it to Platform as a Service (PaaS) and Software as a Service (SaaS) providers. The second approach is a customer-side one that deals with the horizontal scalability in an Infrastructure as a Service (IaaS) model. It addresses the trade-off problem between cost and performance with a multi-goal optimization solution. This approach finds the scale thresholds that achieve the highest performance with the lowest increase in the cost. Moreover, the second approach employs a proposed time series forecasting algorithm to scale the application proactively and avoid under-utilization periods. Furthermore, to mitigate the interference impact on the Internet application performance, we developed a system which finds and eliminates the VMs suffering from performance interference. The developed system is a light-weight solution which does not imply provider involvement. To evaluate our approaches and the designed algorithms at large-scale level, we developed a simulator called (ScaleSim). In the simulator, we implemented scalability components acting as the scalability components of Amazon EC2. The current scalability implementation in Amazon EC2 is used as a reference point for evaluating the improvement in the scalable application performance. ScaleSim is fed with realistic models of the RUBiS benchmark extracted from the real environment. The workload is generated from the access logs of the 1998 world cup website. The results show that optimizing the scalability thresholds and adopting proactive scalability can mitigate 88\% of the resources provisioning overhead impact with only a 9\% increase in the cost.}, language = {en} } @article{DeFreitasJohnsonGoldenetal.2021, author = {De Freitas, Jessica K. and Johnson, Kipp W. and Golden, Eddye and Nadkarni, Girish N. and Dudley, Joel T. and B{\"o}ttinger, Erwin and Glicksberg, Benjamin S. and Miotto, Riccardo}, title = {Phe2vec}, series = {Patterns}, volume = {2}, journal = {Patterns}, number = {9}, publisher = {Elsevier}, address = {Amsterdam}, issn = {2666-3899}, doi = {10.1016/j.patter.2021.100337}, pages = {9}, year = {2021}, abstract = {Robust phenotyping of patients from electronic health records (EHRs) at scale is a challenge in clinical informatics. Here, we introduce Phe2vec, an automated framework for disease phenotyping from EHRs based on unsupervised learning and assess its effectiveness against standard rule-based algorithms from Phenotype KnowledgeBase (PheKB). Phe2vec is based on pre-computing embeddings of medical concepts and patients' clinical history. Disease phenotypes are then derived from a seed concept and its neighbors in the embedding space. Patients are linked to a disease if their embedded representation is close to the disease phenotype. Comparing Phe2vec and PheKB cohorts head-to-head using chart review, Phe2vec performed on par or better in nine out of ten diseases. Differently from other approaches, it can scale to any condition and was validated against widely adopted expert-based standards. Phe2vec aims to optimize clinical informatics research by augmenting current frameworks to characterize patients by condition and derive reliable disease cohorts.}, language = {en} } @article{DischerRichterDoellner2016, author = {Discher, S{\"o}ren and Richter, Rico and D{\"o}llner, J{\"u}rgen Roland Friedrich}, title = {Interactive and View-Dependent See-Through Lenses for Massive 3D Point Clouds}, series = {Advances in 3D Geoinformation}, journal = {Advances in 3D Geoinformation}, publisher = {Springer}, address = {Cham}, isbn = {978-3-319-25691-7}, issn = {1863-2246}, doi = {10.1007/978-3-319-25691-7_3}, pages = {49 -- 62}, year = {2016}, abstract = {3D point clouds are a digital representation of our world and used in a variety of applications. They are captured with LiDAR or derived by image-matching approaches to get surface information of objects, e.g., indoor scenes, buildings, infrastructures, cities, and landscapes. We present novel interaction and visualization techniques for heterogeneous, time variant, and semantically rich 3D point clouds. Interactive and view-dependent see-through lenses are introduced as exploration tools to enhance recognition of objects, semantics, and temporal changes within 3D point cloud depictions. We also develop filtering and highlighting techniques that are used to dissolve occlusion to give context-specific insights. All techniques can be combined with an out-of-core real-time rendering system for massive 3D point clouds. We have evaluated the presented approach with 3D point clouds from different application domains. The results show the usability and how different visualization and exploration tasks can be improved for a variety of domain-specific applications.}, language = {en} }