@phdthesis{Abedjan2014, author = {Abedjan, Ziawasch}, title = {Improving RDF data with data mining}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-71334}, school = {Universit{\"a}t Potsdam}, year = {2014}, abstract = {Linked Open Data (LOD) comprises very many and often large public data sets and knowledge bases. Those datasets are mostly presented in the RDF triple structure of subject, predicate, and object, where each triple represents a statement or fact. Unfortunately, the heterogeneity of available open data requires significant integration steps before it can be used in applications. Meta information, such as ontological definitions and exact range definitions of predicates, are desirable and ideally provided by an ontology. However in the context of LOD, ontologies are often incomplete or simply not available. Thus, it is useful to automatically generate meta information, such as ontological dependencies, range definitions, and topical classifications. Association rule mining, which was originally applied for sales analysis on transactional databases, is a promising and novel technique to explore such data. We designed an adaptation of this technique for min-ing Rdf data and introduce the concept of "mining configurations", which allows us to mine RDF data sets in various ways. Different configurations enable us to identify schema and value dependencies that in combination result in interesting use cases. To this end, we present rule-based approaches for auto-completion, data enrichment, ontology improvement, and query relaxation. Auto-completion remedies the problem of inconsistent ontology usage, providing an editing user with a sorted list of commonly used predicates. A combination of different configurations step extends this approach to create completely new facts for a knowledge base. We present two approaches for fact generation, a user-based approach where a user selects the entity to be amended with new facts and a data-driven approach where an algorithm discovers entities that have to be amended with missing facts. As knowledge bases constantly grow and evolve, another approach to improve the usage of RDF data is to improve existing ontologies. Here, we present an association rule based approach to reconcile ontology and data. Interlacing different mining configurations, we infer an algorithm to discover synonymously used predicates. Those predicates can be used to expand query results and to support users during query formulation. We provide a wide range of experiments on real world datasets for each use case. The experiments and evaluations show the added value of association rule mining for the integration and usability of RDF data and confirm the appropriateness of our mining configuration methodology.}, language = {en} } @book{AbedjanNaumann2011, author = {Abedjan, Ziawasch and Naumann, Felix}, title = {Advancing the discovery of unique column combinations}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-148-6}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-53564}, publisher = {Universit{\"a}t Potsdam}, pages = {25}, year = {2011}, abstract = {Unique column combinations of a relational database table are sets of columns that contain only unique values. Discovering such combinations is a fundamental research problem and has many different data management and knowledge discovery applications. Existing discovery algorithms are either brute force or have a high memory load and can thus be applied only to small datasets or samples. In this paper, the wellknown GORDIAN algorithm and "Apriori-based" algorithms are compared and analyzed for further optimization. We greatly improve the Apriori algorithms through efficient candidate generation and statistics-based pruning methods. A hybrid solution HCAGORDIAN combines the advantages of GORDIAN and our new algorithm HCA, and it significantly outperforms all previous work in many situations.}, language = {en} } @book{AlbrechtNaumann2012, author = {Albrecht, Alexander and Naumann, Felix}, title = {Understanding cryptic schemata in large extract-transform-load systems}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-201-8}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-61257}, publisher = {Universit{\"a}t Potsdam}, pages = {19}, year = {2012}, abstract = {Extract-Transform-Load (ETL) tools are used for the creation, maintenance, and evolution of data warehouses, data marts, and operational data stores. ETL workflows populate those systems with data from various data sources by specifying and executing a DAG of transformations. Over time, hundreds of individual workflows evolve as new sources and new requirements are integrated into the system. The maintenance and evolution of large-scale ETL systems requires much time and manual effort. A key problem is to understand the meaning of unfamiliar attribute labels in source and target databases and ETL transformations. Hard-to-understand attribute labels lead to frustration and time spent to develop and understand ETL workflows. We present a schema decryption technique to support ETL developers in understanding cryptic schemata of sources, targets, and ETL transformations. For a given ETL system, our recommender-like approach leverages the large number of mapped attribute labels in existing ETL workflows to produce good and meaningful decryptions. In this way we are able to decrypt attribute labels consisting of a number of unfamiliar few-letter abbreviations, such as UNP_PEN_INT, which we can decrypt to UNPAID_PENALTY_INTEREST. We evaluate our schema decryption approach on three real-world repositories of ETL workflows and show that our approach is able to suggest high-quality decryptions for cryptic attribute labels in a given schema.}, language = {en} } @book{AlnemrPolyvyanyyAbuJarouretal.2010, author = {Alnemr, Rehab and Polyvyanyy, Artem and AbuJarour, Mohammed and Appeltauer, Malte and Hildebrandt, Dieter and Thomas, Ivonne and Overdick, Hagen and Sch{\"o}bel, Michael and Uflacker, Matthias and Kluth, Stephan and Menzel, Michael and Schmidt, Alexander and Hagedorn, Benjamin and Pascalau, Emilian and Perscheid, Michael and Vogel, Thomas and Hentschel, Uwe and Feinbube, Frank and Kowark, Thomas and Tr{\"u}mper, Jonas and Vogel, Tobias and Becker, Basil}, title = {Proceedings of the 4th Ph.D. Retreat of the HPI Research School on Service-oriented Systems Engineering}, editor = {Meinel, Christoph and Plattner, Hasso and D{\"o}llner, J{\"u}rgen Roland Friedrich and Weske, Mathias and Polze, Andreas and Hirschfeld, Robert and Naumann, Felix and Giese, Holger}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-036-6}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-40838}, publisher = {Universit{\"a}t Potsdam}, pages = {Getr. Z{\"a}hlung}, year = {2010}, language = {en} } @book{AppeltauerHirschfeld2012, author = {Appeltauer, Malte and Hirschfeld, Robert}, title = {The JCop language specification : Version 1.0, April 2012}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-193-6}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-60208}, publisher = {Universit{\"a}t Potsdam}, pages = {iv, 48}, year = {2012}, abstract = {Program behavior that relies on contextual information, such as physical location or network accessibility, is common in today's applications, yet its representation is not sufficiently supported by programming languages. With context-oriented programming (COP), such context-dependent behavioral variations can be explicitly modularized and dynamically activated. In general, COP could be used to manage any context-specific behavior. However, its contemporary realizations limit the control of dynamic adaptation. This, in turn, limits the interaction of COP's adaptation mechanisms with widely used architectures, such as event-based, mobile, and distributed programming. The JCop programming language extends Java with language constructs for context-oriented programming and additionally provides a domain-specific aspect language for declarative control over runtime adaptations. As a result, these redesigned implementations are more concise and better modularized than their counterparts using plain COP. JCop's main features have been described in our previous publications. However, a complete language specification has not been presented so far. This report presents the entire JCop language including the syntax and semantics of its new language constructs.}, language = {en} } @phdthesis{Awad2010, author = {Awad, Ahmed Mahmoud Hany Aly}, title = {A compliance management framework for business process models}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-49222}, school = {Universit{\"a}t Potsdam}, year = {2010}, abstract = {Companies develop process models to explicitly describe their business operations. In the same time, business operations, business processes, must adhere to various types of compliance requirements. Regulations, e.g., Sarbanes Oxley Act of 2002, internal policies, best practices are just a few sources of compliance requirements. In some cases, non-adherence to compliance requirements makes the organization subject to legal punishment. In other cases, non-adherence to compliance leads to loss of competitive advantage and thus loss of market share. Unlike the classical domain-independent behavioral correctness of business processes, compliance requirements are domain-specific. Moreover, compliance requirements change over time. New requirements might appear due to change in laws and adoption of new policies. Compliance requirements are offered or enforced by different entities that have different objectives behind these requirements. Finally, compliance requirements might affect different aspects of business processes, e.g., control flow and data flow. As a result, it is infeasible to hard-code compliance checks in tools. Rather, a repeatable process of modeling compliance rules and checking them against business processes automatically is needed. This thesis provides a formal approach to support process design-time compliance checking. Using visual patterns, it is possible to model compliance requirements concerning control flow, data flow and conditional flow rules. Each pattern is mapped into a temporal logic formula. The thesis addresses the problem of consistency checking among various compliance requirements, as they might stem from divergent sources. Also, the thesis contributes to automatically check compliance requirements against process models using model checking. We show that extra domain knowledge, other than expressed in compliance rules, is needed to reach correct decisions. In case of violations, we are able to provide a useful feedback to the user. The feedback is in the form of parts of the process model whose execution causes the violation. In some cases, our approach is capable of providing automated remedy of the violation.}, language = {en} } @phdthesis{Baier2015, author = {Baier, Thomas}, title = {Matching events and activities}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-84548}, school = {Universit{\"a}t Potsdam}, pages = {xxii, 213}, year = {2015}, abstract = {Nowadays, business processes are increasingly supported by IT services that produce massive amounts of event data during process execution. Aiming at a better process understanding and improvement, this event data can be used to analyze processes using process mining techniques. Process models can be automatically discovered and the execution can be checked for conformance to specified behavior. Moreover, existing process models can be enhanced and annotated with valuable information, for example for performance analysis. While the maturity of process mining algorithms is increasing and more tools are entering the market, process mining projects still face the problem of different levels of abstraction when comparing events with modeled business activities. Mapping the recorded events to activities of a given process model is essential for conformance checking, annotation and understanding of process discovery results. Current approaches try to abstract from events in an automated way that does not capture the required domain knowledge to fit business activities. Such techniques can be a good way to quickly reduce complexity in process discovery. Yet, they fail to enable techniques like conformance checking or model annotation, and potentially create misleading process discovery results by not using the known business terminology. In this thesis, we develop approaches that abstract an event log to the same level that is needed by the business. Typically, this abstraction level is defined by a given process model. Thus, the goal of this thesis is to match events from an event log to activities in a given process model. To accomplish this goal, behavioral and linguistic aspects of process models and event logs as well as domain knowledge captured in existing process documentation are taken into account to build semiautomatic matching approaches. The approaches establish a pre--processing for every available process mining technique that produces or annotates a process model, thereby reducing the manual effort for process analysts. While each of the presented approaches can be used in isolation, we also introduce a general framework for the integration of different matching approaches. The approaches have been evaluated in case studies with industry and using a large industry process model collection and simulated event logs. The evaluation demonstrates the effectiveness and efficiency of the approaches and their robustness towards nonconforming execution logs.}, language = {en} } @article{BarkowskyGiese2020, author = {Barkowsky, Matthias and Giese, Holger}, title = {Hybrid search plan generation for generalized graph pattern matching}, series = {Journal of logical and algebraic methods in programming}, volume = {114}, journal = {Journal of logical and algebraic methods in programming}, publisher = {Elsevier}, address = {New York}, issn = {2352-2208}, doi = {10.1016/j.jlamp.2020.100563}, pages = {29}, year = {2020}, abstract = {In recent years, the increased interest in application areas such as social networks has resulted in a rising popularity of graph-based approaches for storing and processing large amounts of interconnected data. To extract useful information from the growing network structures, efficient querying techniques are required. In this paper, we propose an approach for graph pattern matching that allows a uniform handling of arbitrary constraints over the query vertices. Our technique builds on a previously introduced matching algorithm, which takes concrete host graph information into account to dynamically adapt the employed search plan during query execution. The dynamic algorithm is combined with an existing static approach for search plan generation, resulting in a hybrid technique which we further extend by a more sophisticated handling of filtering effects caused by constraint checks. We evaluate the presented concepts empirically based on an implementation for our graph pattern matching tool, the Story Diagram Interpreter, with queries and data provided by the LDBC Social Network Benchmark. Our results suggest that the hybrid technique may improve search efficiency in several cases, and rarely reduces efficiency.}, language = {en} } @phdthesis{Bauckmann2013, author = {Bauckmann, Jana}, title = {Dependency discovery for data integration}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-66645}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {Data integration aims to combine data of different sources and to provide users with a unified view on these data. This task is as challenging as valuable. In this thesis we propose algorithms for dependency discovery to provide necessary information for data integration. We focus on inclusion dependencies (INDs) in general and a special form named conditional inclusion dependencies (CINDs): (i) INDs enable the discovery of structure in a given schema. (ii) INDs and CINDs support the discovery of cross-references or links between schemas. An IND "A in B" simply states that all values of attribute A are included in the set of values of attribute B. We propose an algorithm that discovers all inclusion dependencies in a relational data source. The challenge of this task is the complexity of testing all attribute pairs and further of comparing all of each attribute pair's values. The complexity of existing approaches depends on the number of attribute pairs, while ours depends only on the number of attributes. Thus, our algorithm enables to profile entirely unknown data sources with large schemas by discovering all INDs. Further, we provide an approach to extract foreign keys from the identified INDs. We extend our IND discovery algorithm to also find three special types of INDs: (i) Composite INDs, such as "AB in CD", (ii) approximate INDs that allow a certain amount of values of A to be not included in B, and (iii) prefix and suffix INDs that represent special cross-references between schemas. Conditional inclusion dependencies are inclusion dependencies with a limited scope defined by conditions over several attributes. Only the matching part of the instance must adhere the dependency. We generalize the definition of CINDs distinguishing covering and completeness conditions and define quality measures for conditions. We propose efficient algorithms that identify covering and completeness conditions conforming to given quality thresholds. The challenge for this task is twofold: (i) Which (and how many) attributes should be used for the conditions? (ii) Which attribute values should be chosen for the conditions? Previous approaches rely on pre-selected condition attributes or can only discover conditions applying to quality thresholds of 100\%. Our approaches were motivated by two application domains: data integration in the life sciences and link discovery for linked open data. We show the efficiency and the benefits of our approaches for use cases in these domains.}, language = {en} } @book{BauckmannAbedjanLeseretal.2012, author = {Bauckmann, Jana and Abedjan, Ziawasch and Leser, Ulf and M{\"u}ller, Heiko and Naumann, Felix}, title = {Covering or complete? : Discovering conditional inclusion dependencies}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-212-4}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-62089}, publisher = {Universit{\"a}t Potsdam}, pages = {34}, year = {2012}, abstract = {Data dependencies, or integrity constraints, are used to improve the quality of a database schema, to optimize queries, and to ensure consistency in a database. In the last years conditional dependencies have been introduced to analyze and improve data quality. In short, a conditional dependency is a dependency with a limited scope defined by conditions over one or more attributes. Only the matching part of the instance must adhere to the dependency. In this paper we focus on conditional inclusion dependencies (CINDs). We generalize the definition of CINDs, distinguishing covering and completeness conditions. We present a new use case for such CINDs showing their value for solving complex data quality tasks. Further, we define quality measures for conditions inspired by precision and recall. We propose efficient algorithms that identify covering and completeness conditions conforming to given quality thresholds. Our algorithms choose not only the condition values but also the condition attributes automatically. Finally, we show that our approach efficiently provides meaningful and helpful results for our use case.}, language = {en} } @book{BauckmannLeserNaumann2010, author = {Bauckmann, Jana and Leser, Ulf and Naumann, Felix}, title = {Efficient and exact computation of inclusion dependencies for data integration}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-048-9}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-41396}, publisher = {Universit{\"a}t Potsdam}, pages = {36}, year = {2010}, abstract = {Data obtained from foreign data sources often come with only superficial structural information, such as relation names and attribute names. Other types of metadata that are important for effective integration and meaningful querying of such data sets are missing. In particular, relationships among attributes, such as foreign keys, are crucial metadata for understanding the structure of an unknown database. The discovery of such relationships is difficult, because in principle for each pair of attributes in the database each pair of data values must be compared. A precondition for a foreign key is an inclusion dependency (IND) between the key and the foreign key attributes. We present with Spider an algorithm that efficiently finds all INDs in a given relational database. It leverages the sorting facilities of DBMS but performs the actual comparisons outside of the database to save computation. Spider analyzes very large databases up to an order of magnitude faster than previous approaches. We also evaluate in detail the effectiveness of several heuristics to reduce the number of necessary comparisons. Furthermore, we generalize Spider to find composite INDs covering multiple attributes, and partial INDs, which are true INDs for all but a certain number of values. This last type is particularly relevant when integrating dirty data as is often the case in the life sciences domain - our driving motivation.}, language = {en} } @phdthesis{Becker2013, author = {Becker, Basil}, title = {Architectural modelling and verification of open service-oriented systems of systems}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-70158}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {Systems of Systems (SoS) have received a lot of attention recently. In this thesis we will focus on SoS that are built atop the techniques of Service-Oriented Architectures and thus combine the benefits and challenges of both paradigms. For this thesis we will understand SoS as ensembles of single autonomous systems that are integrated to a larger system, the SoS. The interesting fact about these systems is that the previously isolated systems are still maintained, improved and developed on their own. Structural dynamics is an issue in SoS, as at every point in time systems can join and leave the ensemble. This and the fact that the cooperation among the constituent systems is not necessarily observable means that we will consider these systems as open systems. Of course, the system has a clear boundary at each point in time, but this can only be identified by halting the complete SoS. However, halting a system of that size is practically impossible. Often SoS are combinations of software systems and physical systems. Hence a failure in the software system can have a serious physical impact what makes an SoS of this kind easily a safety-critical system. The contribution of this thesis is a modelling approach that extends OMG's SoaML and basically relies on collaborations and roles as an abstraction layer above the components. This will allow us to describe SoS at an architectural level. We will also give a formal semantics for our modelling approach which employs hybrid graph-transformation systems. The modelling approach is accompanied by a modular verification scheme that will be able to cope with the complexity constraints implied by the SoS' structural dynamics and size. Building such autonomous systems as SoS without evolution at the architectural level --- i. e. adding and removing of components and services --- is inadequate. Therefore our approach directly supports the modelling and verification of evolution.}, language = {en} } @book{BeckerGiese2012, author = {Becker, Basil and Giese, Holger}, title = {Cyber-physical systems with dynamic structure : towards modeling and verification of inductive invariants}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-217-9}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-62437}, publisher = {Universit{\"a}t Potsdam}, pages = {iv, 27}, year = {2012}, abstract = {Cyber-physical systems achieve sophisticated system behavior exploring the tight interconnection of physical coupling present in classical engineering systems and information technology based coupling. A particular challenging case are systems where these cyber-physical systems are formed ad hoc according to the specific local topology, the available networking capabilities, and the goals and constraints of the subsystems captured by the information processing part. In this paper we present a formalism that permits to model the sketched class of cyber-physical systems. The ad hoc formation of tightly coupled subsystems of arbitrary size are specified using a UML-based graph transformation system approach. Differential equations are employed to define the resulting tightly coupled behavior. Together, both form hybrid graph transformation systems where the graph transformation rules define the discrete steps where the topology or modes may change, while the differential equations capture the continuous behavior in between such discrete changes. In addition, we demonstrate that automated analysis techniques known for timed graph transformation systems for inductive invariants can be extended to also cover the hybrid case for an expressive case of hybrid models where the formed tightly coupled subsystems are restricted to smaller local networks.}, language = {en} } @book{BeckerGieseNeumann2009, author = {Becker, Basil and Giese, Holger and Neumann, Stefan}, title = {Correct dynamic service-oriented architectures : modeling and compositional verification with dynamic collaborations}, organization = {System Analysis and Modeling Group}, isbn = {978-3-940793-91-1}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-30473}, publisher = {Universit{\"a}t Potsdam}, year = {2009}, abstract = {Service-oriented modeling employs collaborations to capture the coordination of multiple roles in form of service contracts. In case of dynamic collaborations the roles may join and leave the collaboration at runtime and therefore complex structural dynamics can result, which makes it very hard to ensure their correct and safe operation. We present in this paper our approach for modeling and verifying such dynamic collaborations. Modeling is supported using a well-defined subset of UML class diagrams, behavioral rules for the structural dynamics, and UML state machines for the role behavior. To be also able to verify the resulting service-oriented systems, we extended our former results for the automated verification of systems with structural dynamics [7, 8] and developed a compositional reasoning scheme, which enables the reuse of verification results. We outline our approach using the example of autonomous vehicles that use such dynamic collaborations via ad-hoc networking to coordinate and optimize their joint behavior.}, language = {en} } @phdthesis{Berg2013, author = {Berg, Gregor}, title = {Virtual prototypes for the model-based elicitation and validation of collaborative scenarios}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-69729}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {Requirements engineers have to elicit, document, and validate how stakeholders act and interact to achieve their common goals in collaborative scenarios. Only after gathering all information concerning who interacts with whom to do what and why, can a software system be designed and realized which supports the stakeholders to do their work. To capture and structure requirements of different (groups of) stakeholders, scenario-based approaches have been widely used and investigated. Still, the elicitation and validation of requirements covering collaborative scenarios remains complicated, since the required information is highly intertwined, fragmented, and distributed over several stakeholders. Hence, it can only be elicited and validated collaboratively. In times of globally distributed companies, scheduling and conducting workshops with groups of stakeholders is usually not feasible due to budget and time constraints. Talking to individual stakeholders, on the other hand, is feasible but leads to fragmented and incomplete stakeholder scenarios. Going back and forth between different individual stakeholders to resolve this fragmentation and explore uncovered alternatives is an error-prone, time-consuming, and expensive task for the requirements engineers. While formal modeling methods can be employed to automatically check and ensure consistency of stakeholder scenarios, such methods introduce additional overhead since their formal notations have to be explained in each interaction between stakeholders and requirements engineers. Tangible prototypes as they are used in other disciplines such as design, on the other hand, allow designers to feasibly validate and iterate concepts and requirements with stakeholders. This thesis proposes a model-based approach for prototyping formal behavioral specifications of stakeholders who are involved in collaborative scenarios. By simulating and animating such specifications in a remote domain-specific visualization, stakeholders can experience and validate the scenarios captured so far, i.e., how other stakeholders act and react. This interactive scenario simulation is referred to as a model-based virtual prototype. Moreover, through observing how stakeholders interact with a virtual prototype of their collaborative scenarios, formal behavioral specifications can be automatically derived which complete the otherwise fragmented scenarios. This, in turn, enables requirements engineers to elicit and validate collaborative scenarios in individual stakeholder sessions - decoupled, since stakeholders can participate remotely and are not forced to be available for a joint session at the same time. This thesis discusses and evaluates the feasibility, understandability, and modifiability of model-based virtual prototypes. Similarly to how physical prototypes are perceived, the presented approach brings behavioral models closer to being tangible for stakeholders and, moreover, combines the advantages of joint stakeholder sessions and decoupled sessions.}, language = {en} } @book{BeyhlBlouinGieseetal.2016, author = {Beyhl, Thomas and Blouin, Dominique and Giese, Holger and Lambers, Leen}, title = {On the operationalization of graph queries with generalized discrimination networks}, number = {106}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-372-5}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-96279}, publisher = {Universit{\"a}t Potsdam}, pages = {33}, year = {2016}, abstract = {Graph queries have lately gained increased interest due to application areas such as social networks, biological networks, or model queries. For the relational database case the relational algebra and generalized discrimination networks have been studied to find appropriate decompositions into subqueries and ordering of these subqueries for query evaluation or incremental updates of query results. For graph database queries however there is no formal underpinning yet that allows us to find such suitable operationalizations. Consequently, we suggest a simple operational concept for the decomposition of arbitrary complex queries into simpler subqueries and the ordering of these subqueries in form of generalized discrimination networks for graph queries inspired by the relational case. The approach employs graph transformation rules for the nodes of the network and thus we can employ the underlying theory. We further show that the proposed generalized discrimination networks have the same expressive power as nested graph conditions.}, language = {en} } @book{BeyhlGiese2015, author = {Beyhl, Thomas and Giese, Holger}, title = {Efficient and scalable graph view maintenance for deductive graph databases based on generalized discrimination networks}, number = {99}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-339-8}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-79535}, publisher = {Universit{\"a}t Potsdam}, pages = {148}, year = {2015}, abstract = {Graph databases provide a natural way of storing and querying graph data. In contrast to relational databases, queries over graph databases enable to refer directly to the graph structure of such graph data. For example, graph pattern matching can be employed to formulate queries over graph data. However, as for relational databases running complex queries can be very time-consuming and ruin the interactivity with the database. One possible approach to deal with this performance issue is to employ database views that consist of pre-computed answers to common and often stated queries. But to ensure that database views yield consistent query results in comparison with the data from which they are derived, these database views must be updated before queries make use of these database views. Such a maintenance of database views must be performed efficiently, otherwise the effort to create and maintain views may not pay off in comparison to processing the queries directly on the data from which the database views are derived. At the time of writing, graph databases do not support database views and are limited to graph indexes that index nodes and edges of the graph data for fast query evaluation, but do not enable to maintain pre-computed answers of complex queries over graph data. Moreover, the maintenance of database views in graph databases becomes even more challenging when negation and recursion have to be supported as in deductive relational databases. In this technical report, we present an approach for the efficient and scalable incremental graph view maintenance for deductive graph databases. The main concept of our approach is a generalized discrimination network that enables to model nested graph conditions including negative application conditions and recursion, which specify the content of graph views derived from graph data stored by graph databases. The discrimination network enables to automatically derive generic maintenance rules using graph transformations for maintaining graph views in case the graph data from which the graph views are derived change. We evaluate our approach in terms of a case study using multiple data sets derived from open source projects.}, language = {en} } @article{Boissier2021, author = {Boissier, Martin}, title = {Robust and budget-constrained encoding configurations for in-memory database systems}, series = {Proceedings of the VLDB Endowment}, volume = {15}, journal = {Proceedings of the VLDB Endowment}, number = {4}, publisher = {Association for Computing Machinery (ACM)}, address = {[New York]}, issn = {2150-8097}, doi = {10.14778/3503585.3503588}, pages = {780 -- 793}, year = {2021}, abstract = {Data encoding has been applied to database systems for decades as it mitigates bandwidth bottlenecks and reduces storage requirements. But even in the presence of these advantages, most in-memory database systems use data encoding only conservatively as the negative impact on runtime performance can be severe. Real-world systems with large parts being infrequently accessed and cost efficiency constraints in cloud environments require solutions that automatically and efficiently select encoding techniques, including heavy-weight compression. In this paper, we introduce workload-driven approaches to automaticaly determine memory budget-constrained encoding configurations using greedy heuristics and linear programming. We show for TPC-H, TPC-DS, and the Join Order Benchmark that optimized encoding configurations can reduce the main memory footprint significantly without a loss in runtime performance over state-of-the-art dictionary encoding. To yield robust selections, we extend the linear programming-based approach to incorporate query runtime constraints and mitigate unexpected performance regressions.}, language = {en} } @article{BonifatiMiorNaumannetal.2022, author = {Bonifati, Angela and Mior, Michael J. and Naumann, Felix and Noack, Nele Sina}, title = {How inclusive are we?}, series = {SIGMOD record / Association for Computing Machinery, Special Interest Group on Management of Data}, volume = {50}, journal = {SIGMOD record / Association for Computing Machinery, Special Interest Group on Management of Data}, number = {4}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {0163-5808}, doi = {10.1145/3516431.3516438}, pages = {30 -- 35}, year = {2022}, abstract = {ACM SIGMOD, VLDB and other database organizations have committed to fostering an inclusive and diverse community, as do many other scientific organizations. Recently, different measures have been taken to advance these goals, especially for underrepresented groups. One possible measure is double-blind reviewing, which aims to hide gender, ethnicity, and other properties of the authors.
We report the preliminary results of a gender diversity analysis of publications of the database community across several peer-reviewed venues, and also compare women's authorship percentages in both single-blind and double-blind venues along the years. We also obtained a cross comparison of the obtained results in data management with other relevant areas in Computer Science.}, language = {en} } @article{BorchertMockTomczaketal.2021, author = {Borchert, Florian and Mock, Andreas and Tomczak, Aurelie and H{\"u}gel, Jonas and Alkarkoukly, Samer and Knurr, Alexander and Volckmar, Anna-Lena and Stenzinger, Albrecht and Schirmacher, Peter and Debus, J{\"u}rgen and J{\"a}ger, Dirk and Longerich, Thomas and Fr{\"o}hling, Stefan and Eils, Roland and Bougatf, Nina and Sax, Ulrich and Schapranow, Matthieu-Patrick}, title = {Correction to: Knowledge bases and software support for variant interpretation in precision oncology}, series = {Briefings in bioinformatics}, volume = {22}, journal = {Briefings in bioinformatics}, number = {6}, publisher = {Oxford Univ. Press}, address = {Oxford}, issn = {1467-5463}, doi = {10.1093/bib/bbab246}, pages = {1}, year = {2021}, language = {en} } @article{BorchertMockTomczaketal.2021, author = {Borchert, Florian and Mock, Andreas and Tomczak, Aurelie and H{\"u}gel, Jonas and Alkarkoukly, Samer and Knurr, Alexander and Volckmar, Anna-Lena and Stenzinger, Albrecht and Schirmacher, Peter and Debus, J{\"u}rgen and J{\"a}ger, Dirk and Longerich, Thomas and Fr{\"o}hling, Stefan and Eils, Roland and Bougatf, Nina and Sax, Ulrich and Schapranow, Matthieu-Patrick}, title = {Knowledge bases and software support for variant interpretation in precision oncology}, series = {Briefings in bioinformatics}, volume = {22}, journal = {Briefings in bioinformatics}, number = {6}, publisher = {Oxford Univ. Press}, address = {Oxford}, issn = {1467-5463}, doi = {10.1093/bib/bbab134}, pages = {17}, year = {2021}, abstract = {Precision oncology is a rapidly evolving interdisciplinary medical specialty. Comprehensive cancer panels are becoming increasingly available at pathology departments worldwide, creating the urgent need for scalable cancer variant annotation and molecularly informed treatment recommendations. A wealth of mainly academia-driven knowledge bases calls for software tools supporting the multi-step diagnostic process. We derive a comprehensive list of knowledge bases relevant for variant interpretation by a review of existing literature followed by a survey among medical experts from university hospitals in Germany. In addition, we review cancer variant interpretation tools, which integrate multiple knowledge bases. We categorize the knowledge bases along the diagnostic process in precision oncology and analyze programmatic access options as well as the integration of knowledge bases into software tools. The most commonly used knowledge bases provide good programmatic access options and have been integrated into a range of software tools. For the wider set of knowledge bases, access options vary across different parts of the diagnostic process. Programmatic access is limited for information regarding clinical classifications of variants and for therapy recommendations. The main issue for databases used for biological classification of pathogenic variants and pathway context information is the lack of standardized interfaces. There is no single cancer variant interpretation tool that integrates all identified knowledge bases. Specialized tools are available and need to be further developed for different steps in the diagnostic process.}, language = {en} } @book{BreestBoucheGrundetal.2006, author = {Breest, Martin and Bouch{\´e}, Paul and Grund, Martin and Haubrock, S{\"o}ren and H{\"u}ttenrauch, Stefan and Kylau, Uwe and Ploskonos, Anna and Queck, Tobias and Schreiter, Torben}, title = {Fundamentals of Service-Oriented Engineering}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-939469-35-3}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-33801}, publisher = {Universit{\"a}t Potsdam}, pages = {Getr. Z{\"a}hlung}, year = {2006}, abstract = {Since 2002, keywords like service-oriented engineering, service-oriented computing, and service-oriented architecture have been widely used in research, education, and enterprises. These and related terms are often misunderstood or used incorrectly. To correct these misunderstandings, a deeper knowledge of the concepts, the historical backgrounds, and an overview of service-oriented architectures is demanded and given in this paper.}, language = {en} } @inproceedings{BynensVanLanduytTruyenetal.2010, author = {Bynens, Maarten and Van Landuyt, Dimitri and Truyen, Eddy and Joosen, Wouter}, title = {Towards reusable aspects: the callback mismatch problem}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-41347}, year = {2010}, abstract = {Because software development is increasingly expensive and timeconsuming, software reuse gains importance. Aspect-oriented software development modularizes crosscutting concerns which enables their systematic reuse. Literature provides a number of AOP patterns and best practices for developing reusable aspects based on compelling examples for concerns like tracing, transactions and persistence. However, such best practices are lacking for systematically reusing invasive aspects. In this paper, we present the 'callback mismatch problem'. This problem arises in the context of abstraction mismatch, in which the aspect is required to issue a callback to the base application. As a consequence, the composition of invasive aspects is cumbersome to implement, difficult to maintain and impossible to reuse. We motivate this problem in a real-world example, show that it persists in the current state-of-the-art, and outline the need for advanced aspectual composition mechanisms to deal with this.}, language = {en} } @phdthesis{Boehm2013, author = {B{\"o}hm, Christoph}, title = {Enriching the Web of Data with topics and links}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-68624}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {This thesis presents novel ideas and research findings for the Web of Data - a global data space spanning many so-called Linked Open Data sources. Linked Open Data adheres to a set of simple principles to allow easy access and reuse for data published on the Web. Linked Open Data is by now an established concept and many (mostly academic) publishers adopted the principles building a powerful web of structured knowledge available to everybody. However, so far, Linked Open Data does not yet play a significant role among common web technologies that currently facilitate a high-standard Web experience. In this work, we thoroughly discuss the state-of-the-art for Linked Open Data and highlight several shortcomings - some of them we tackle in the main part of this work. First, we propose a novel type of data source meta-information, namely the topics of a dataset. This information could be published with dataset descriptions and support a variety of use cases, such as data source exploration and selection. For the topic retrieval, we present an approach coined Annotated Pattern Percolation (APP), which we evaluate with respect to topics extracted from Wikipedia portals. Second, we contribute to entity linking research by presenting an optimization model for joint entity linking, showing its hardness, and proposing three heuristics implemented in the LINked Data Alignment (LINDA) system. Our first solution can exploit multi-core machines, whereas the second and third approach are designed to run in a distributed shared-nothing environment. We discuss and evaluate the properties of our approaches leading to recommendations which algorithm to use in a specific scenario. The distributed algorithms are among the first of their kind, i.e., approaches for joint entity linking in a distributed fashion. Also, we illustrate that we can tackle the entity linking problem on the very large scale with data comprising more than 100 millions of entity representations from very many sources. Finally, we approach a sub-problem of entity linking, namely the alignment of concepts. We again target a method that looks at the data in its entirety and does not neglect existing relations. Also, this concept alignment method shall execute very fast to serve as a preprocessing for further computations. Our approach, called Holistic Concept Matching (HCM), achieves the required speed through grouping the input by comparing so-called knowledge representations. Within the groups, we perform complex similarity computations, relation conclusions, and detect semantic contradictions. The quality of our result is again evaluated on a large and heterogeneous dataset from the real Web. In summary, this work contributes a set of techniques for enhancing the current state of the Web of Data. All approaches have been tested on large and heterogeneous real-world input.}, language = {en} } @book{CalmezHesseSiegmundetal.2013, author = {Calmez, Conrad and Hesse, Hubert and Siegmund, Benjamin and Stamm, Sebastian and Thomschke, Astrid and Hirschfeld, Robert and Ingalls, Dan and Lincke, Jens}, title = {Explorative authoring of Active Web content in a mobile environment}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-232-2}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-64054}, publisher = {Universit{\"a}t Potsdam}, pages = {132}, year = {2013}, abstract = {Developing rich Web applications can be a complex job - especially when it comes to mobile device support. Web-based environments such as Lively Webwerkstatt can help developers implement such applications by making the development process more direct and interactive. Further the process of developing software is collaborative which creates the need that the development environment offers collaboration facilities. This report describes extensions of the webbased development environment Lively Webwerkstatt such that it can be used in a mobile environment. The extensions are collaboration mechanisms, user interface adaptations but as well event processing and performance measuring on mobile devices.}, language = {en} } @article{ChauhanFriedrichRothenberger2020, author = {Chauhan, Ankit and Friedrich, Tobias and Rothenberger, Ralf}, title = {Greed is good for deterministic scale-free networks}, series = {Algorithmica : an international journal in computer science}, volume = {82}, journal = {Algorithmica : an international journal in computer science}, number = {11}, publisher = {Springer}, address = {New York}, issn = {0178-4617}, doi = {10.1007/s00453-020-00729-z}, pages = {3338 -- 3389}, year = {2020}, abstract = {Large real-world networks typically follow a power-law degree distribution. To study such networks, numerous random graph models have been proposed. However, real-world networks are not drawn at random. Therefore, Brach et al. (27th symposium on discrete algorithms (SODA), pp 1306-1325, 2016) introduced two natural deterministic conditions: (1) a power-law upper bound on the degree distribution (PLB-U) and (2) power-law neighborhoods, that is, the degree distribution of neighbors of each vertex is also upper bounded by a power law (PLB-N). They showed that many real-world networks satisfy both properties and exploit them to design faster algorithms for a number of classical graph problems. We complement their work by showing that some well-studied random graph models exhibit both of the mentioned PLB properties. PLB-U and PLB-N hold with high probability for Chung-Lu Random Graphs and Geometric Inhomogeneous Random Graphs and almost surely for Hyperbolic Random Graphs. As a consequence, all results of Brach et al. also hold with high probability or almost surely for those random graph classes. In the second part we study three classical NP-hard optimization problems on PLB networks. It is known that on general graphs with maximum degree Delta, a greedy algorithm, which chooses nodes in the order of their degree, only achieves a Omega (ln Delta)-approximation forMinimum Vertex Cover and Minimum Dominating Set, and a Omega(Delta)-approximation forMaximum Independent Set. We prove that the PLB-U property with beta>2 suffices for the greedy approach to achieve a constant-factor approximation for all three problems. We also show that these problems are APX-hard even if PLB-U, PLB-N, and an additional power-law lower bound on the degree distribution hold. Hence, a PTAS cannot be expected unless P = NP. Furthermore, we prove that all three problems are in MAX SNP if the PLB-U property holds.}, language = {en} } @article{ChujfiLaRocheMeinel2017, author = {Chujfi-La-Roche, Salim and Meinel, Christoph}, title = {Matching cognitively sympathetic individual styles to develop collective intelligence in digital communities}, series = {AI \& society : the journal of human-centred systems and machine intelligence}, volume = {35}, journal = {AI \& society : the journal of human-centred systems and machine intelligence}, number = {1}, publisher = {Springer}, address = {New York}, issn = {0951-5666}, doi = {10.1007/s00146-017-0780-x}, pages = {5 -- 15}, year = {2017}, abstract = {Creation, collection and retention of knowledge in digital communities is an activity that currently requires being explicitly targeted as a secure method of keeping intellectual capital growing in the digital era. In particular, we consider it relevant to analyze and evaluate the empathetic cognitive personalities and behaviors that individuals now have with the change from face-to-face communication (F2F) to computer-mediated communication (CMC) online. This document proposes a cyber-humanistic approach to enhance the traditional SECI knowledge management model. A cognitive perception is added to its cyclical process following design thinking interaction, exemplary for improvement of the method in which knowledge is continuously created, converted and shared. In building a cognitive-centered model, we specifically focus on the effective identification and response to cognitive stimulation of individuals, as they are the intellectual generators and multiplicators of knowledge in the online environment. Our target is to identify how geographically distributed-digital-organizations should align the individual's cognitive abilities to promote iteration and improve interaction as a reliable stimulant of collective intelligence. The new model focuses on analyzing the four different stages of knowledge processing, where individuals with sympathetic cognitive personalities can significantly boost knowledge creation in a virtual social system. For organizations, this means that multidisciplinary individuals can maximize their extensive potential, by externalizing their knowledge in the correct stage of the knowledge creation process, and by collaborating with their appropriate sympathetically cognitive remote peers.}, language = {en} } @phdthesis{Dawoud2013, author = {Dawoud, Wesam}, title = {Scalability and performance management of internet applications in the cloud}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-68187}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {Cloud computing is a model for enabling on-demand access to a shared pool of computing resources. With virtually limitless on-demand resources, a cloud environment enables the hosted Internet application to quickly cope when there is an increase in the workload. However, the overhead of provisioning resources exposes the Internet application to periods of under-provisioning and performance degradation. Moreover, the performance interference, due to the consolidation in the cloud environment, complicates the performance management of the Internet applications. In this dissertation, we propose two approaches to mitigate the impact of the resources provisioning overhead. The first approach employs control theory to scale resources vertically and cope fast with workload. This approach assumes that the provider has knowledge and control over the platform running in the virtual machines (VMs), which limits it to Platform as a Service (PaaS) and Software as a Service (SaaS) providers. The second approach is a customer-side one that deals with the horizontal scalability in an Infrastructure as a Service (IaaS) model. It addresses the trade-off problem between cost and performance with a multi-goal optimization solution. This approach finds the scale thresholds that achieve the highest performance with the lowest increase in the cost. Moreover, the second approach employs a proposed time series forecasting algorithm to scale the application proactively and avoid under-utilization periods. Furthermore, to mitigate the interference impact on the Internet application performance, we developed a system which finds and eliminates the VMs suffering from performance interference. The developed system is a light-weight solution which does not imply provider involvement. To evaluate our approaches and the designed algorithms at large-scale level, we developed a simulator called (ScaleSim). In the simulator, we implemented scalability components acting as the scalability components of Amazon EC2. The current scalability implementation in Amazon EC2 is used as a reference point for evaluating the improvement in the scalable application performance. ScaleSim is fed with realistic models of the RUBiS benchmark extracted from the real environment. The workload is generated from the access logs of the 1998 world cup website. The results show that optimizing the scalability thresholds and adopting proactive scalability can mitigate 88\% of the resources provisioning overhead impact with only a 9\% increase in the cost.}, language = {en} } @article{DeFreitasJohnsonGoldenetal.2021, author = {De Freitas, Jessica K. and Johnson, Kipp W. and Golden, Eddye and Nadkarni, Girish N. and Dudley, Joel T. and B{\"o}ttinger, Erwin and Glicksberg, Benjamin S. and Miotto, Riccardo}, title = {Phe2vec}, series = {Patterns}, volume = {2}, journal = {Patterns}, number = {9}, publisher = {Elsevier}, address = {Amsterdam}, issn = {2666-3899}, doi = {10.1016/j.patter.2021.100337}, pages = {9}, year = {2021}, abstract = {Robust phenotyping of patients from electronic health records (EHRs) at scale is a challenge in clinical informatics. Here, we introduce Phe2vec, an automated framework for disease phenotyping from EHRs based on unsupervised learning and assess its effectiveness against standard rule-based algorithms from Phenotype KnowledgeBase (PheKB). Phe2vec is based on pre-computing embeddings of medical concepts and patients' clinical history. Disease phenotypes are then derived from a seed concept and its neighbors in the embedding space. Patients are linked to a disease if their embedded representation is close to the disease phenotype. Comparing Phe2vec and PheKB cohorts head-to-head using chart review, Phe2vec performed on par or better in nine out of ten diseases. Differently from other approaches, it can scale to any condition and was validated against widely adopted expert-based standards. Phe2vec aims to optimize clinical informatics research by augmenting current frameworks to characterize patients by condition and derive reliable disease cohorts.}, language = {en} } @article{DischerRichterDoellner2016, author = {Discher, S{\"o}ren and Richter, Rico and D{\"o}llner, J{\"u}rgen Roland Friedrich}, title = {Interactive and View-Dependent See-Through Lenses for Massive 3D Point Clouds}, series = {Advances in 3D Geoinformation}, journal = {Advances in 3D Geoinformation}, publisher = {Springer}, address = {Cham}, isbn = {978-3-319-25691-7}, issn = {1863-2246}, doi = {10.1007/978-3-319-25691-7_3}, pages = {49 -- 62}, year = {2016}, abstract = {3D point clouds are a digital representation of our world and used in a variety of applications. They are captured with LiDAR or derived by image-matching approaches to get surface information of objects, e.g., indoor scenes, buildings, infrastructures, cities, and landscapes. We present novel interaction and visualization techniques for heterogeneous, time variant, and semantically rich 3D point clouds. Interactive and view-dependent see-through lenses are introduced as exploration tools to enhance recognition of objects, semantics, and temporal changes within 3D point cloud depictions. We also develop filtering and highlighting techniques that are used to dissolve occlusion to give context-specific insights. All techniques can be combined with an out-of-core real-time rendering system for massive 3D point clouds. We have evaluated the presented approach with 3D point clouds from different application domains. The results show the usability and how different visualization and exploration tasks can be improved for a variety of domain-specific applications.}, language = {en} } @article{DoerrKrejca2021, author = {Doerr, Benjamin and Krejca, Martin Stefan}, title = {A simplified run time analysis of the univariate marginal distribution algorithm on LeadingOnes}, series = {Theoretical computer science}, volume = {851}, journal = {Theoretical computer science}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0304-3975}, doi = {10.1016/j.tcs.2020.11.028}, pages = {121 -- 128}, year = {2021}, abstract = {With elementary means, we prove a stronger run time guarantee for the univariate marginal distribution algorithm (UMDA) optimizing the LEADINGONES benchmark function in the desirable regime with low genetic drift. If the population size is at least quasilinear, then, with high probability, the UMDA samples the optimum in a number of iterations that is linear in the problem size divided by the logarithm of the UMDA's selection rate. This improves over the previous guarantee, obtained by Dang and Lehre (2015) via the deep level-based population method, both in terms of the run time and by demonstrating further run time gains from small selection rates. Under similar assumptions, we prove a lower bound that matches our upper bound up to constant factors.}, language = {en} } @article{DoerrNeumannSutton2016, author = {Doerr, Benjamin and Neumann, Frank and Sutton, Andrew M.}, title = {Time Complexity Analysis of Evolutionary Algorithms on Random Satisfiable k-CNF Formulas}, series = {Algorithmica : an international journal in computer science}, volume = {78}, journal = {Algorithmica : an international journal in computer science}, publisher = {Springer}, address = {New York}, issn = {0178-4617}, doi = {10.1007/s00453-016-0190-3}, pages = {561 -- 586}, year = {2016}, abstract = {We contribute to the theoretical understanding of randomized search heuristics by investigating their optimization behavior on satisfiable random k-satisfiability instances both in the planted solution model and the uniform model conditional on satisfiability. Denoting the number of variables by n, our main technical result is that the simple () evolutionary algorithm with high probability finds a satisfying assignment in time when the clause-variable density is at least logarithmic. For low density instances, evolutionary algorithms seem to be less effective, and all we can show is a subexponential upper bound on the runtime for densities below . We complement these mathematical results with numerical experiments on a broader density spectrum. They indicate that, indeed, the () EA is less efficient on lower densities. Our experiments also suggest that the implicit constants hidden in our main runtime guarantee are low. Our main result extends and considerably improves the result obtained by Sutton and Neumann (Lect Notes Comput Sci 8672:942-951, 2014) in terms of runtime, minimum density, and clause length. These improvements are made possible by establishing a close fitness-distance correlation in certain parts of the search space. This approach might be of independent interest and could be useful for other average-case analyses of randomized search heuristics. While the notion of a fitness-distance correlation has been around for a long time, to the best of our knowledge, this is the first time that fitness-distance correlation is explicitly used to rigorously prove a performance statement for an evolutionary algorithm.}, language = {en} } @book{DraisbachNaumannSzottetal.2012, author = {Draisbach, Uwe and Naumann, Felix and Szott, Sascha and Wonneberg, Oliver}, title = {Adaptive windows for duplicate detection}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-143-1}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-53007}, publisher = {Universit{\"a}t Potsdam}, pages = {41}, year = {2012}, abstract = {Duplicate detection is the task of identifying all groups of records within a data set that represent the same real-world entity, respectively. This task is difficult, because (i) representations might differ slightly, so some similarity measure must be defined to compare pairs of records and (ii) data sets might have a high volume making a pair-wise comparison of all records infeasible. To tackle the second problem, many algorithms have been suggested that partition the data set and compare all record pairs only within each partition. One well-known such approach is the Sorted Neighborhood Method (SNM), which sorts the data according to some key and then advances a window over the data comparing only records that appear within the same window. We propose several variations of SNM that have in common a varying window size and advancement. The general intuition of such adaptive windows is that there might be regions of high similarity suggesting a larger window size and regions of lower similarity suggesting a smaller window size. We propose and thoroughly evaluate several adaption strategies, some of which are provably better than the original SNM in terms of efficiency (same results with fewer comparisons).}, language = {en} } @article{DreselerBoissierRabletal.2020, author = {Dreseler, Markus and Boissier, Martin and Rabl, Tilmann and Uflacker, Matthias}, title = {Quantifying TPC-H choke points and their optimizations}, series = {Proceedings of the VLDB Endowment}, volume = {13}, journal = {Proceedings of the VLDB Endowment}, number = {8}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {2150-8097}, doi = {10.14778/3389133.3389138}, pages = {1206 -- 1220}, year = {2020}, abstract = {TPC-H continues to be the most widely used benchmark for relational OLAP systems. It poses a number of challenges, also known as "choke points", which database systems have to solve in order to achieve good benchmark results. Examples include joins across multiple tables, correlated subqueries, and correlations within the TPC-H data set. Knowing the impact of such optimizations helps in developing optimizers as well as in interpreting TPC-H results across database systems. This paper provides a systematic analysis of choke points and their optimizations. It complements previous work on TPC-H choke points by providing a quantitative discussion of their relevance. It focuses on eleven choke points where the optimizations are beneficial independently of the database system. Of these, the flattening of subqueries and the placement of predicates have the biggest impact. Three queries (Q2, Q17, and Q21) are strongly ifluenced by the choice of an efficient query plan; three others (Q1, Q13, and Q18) are less influenced by plan optimizations and more dependent on an efficient execution engine.}, language = {en} } @book{DyckGiese2017, author = {Dyck, Johannes and Giese, Holger}, title = {k-Inductive invariant checking for graph transformation systems}, number = {119}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-406-7}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-397044}, publisher = {Universit{\"a}t Potsdam}, pages = {45}, year = {2017}, abstract = {While offering significant expressive power, graph transformation systems often come with rather limited capabilities for automated analysis, particularly if systems with many possible initial graphs and large or infinite state spaces are concerned. One approach that tries to overcome these limitations is inductive invariant checking. However, the verification of inductive invariants often requires extensive knowledge about the system in question and faces the approach-inherent challenges of locality and lack of context. To address that, this report discusses k-inductive invariant checking for graph transformation systems as a generalization of inductive invariants. The additional context acquired by taking multiple (k) steps into account is the key difference to inductive invariant checking and is often enough to establish the desired invariants without requiring the iterative development of additional properties. To analyze possibly infinite systems in a finite fashion, we introduce a symbolic encoding for transformation traces using a restricted form of nested application conditions. As its central contribution, this report then presents a formal approach and algorithm to verify graph constraints as k-inductive invariants. We prove the approach's correctness and demonstrate its applicability by means of several examples evaluated with a prototypical implementation of our algorithm.}, language = {en} } @book{DyckGiese2015, author = {Dyck, Johannes and Giese, Holger}, title = {Inductive invariant checking with partial negative application conditions}, number = {98}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-333-6}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-77748}, publisher = {Universit{\"a}t Potsdam}, pages = {43}, year = {2015}, abstract = {Graph transformation systems are a powerful formal model to capture model transformations or systems with infinite state space, among others. However, this expressive power comes at the cost of rather limited automated analysis capabilities. The general case of unbounded many initial graphs or infinite state spaces is only supported by approaches with rather limited scalability or expressiveness. In this report we improve an existing approach for the automated verification of inductive invariants for graph transformation systems. By employing partial negative application conditions to represent and check many alternative conditions in a more compact manner, we can check examples with rules and constraints of substantially higher complexity. We also substantially extend the expressive power by supporting more complex negative application conditions and provide higher accuracy by employing advanced implication checks. The improvements are evaluated and compared with another applicable tool by considering three case studies.}, language = {en} } @book{DyckGieseLambers2017, author = {Dyck, Johannes and Giese, Holger and Lambers, Leen}, title = {Automatic verification of behavior preservation at the transformation level for relational model transformation}, number = {112}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-391-6}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-100279}, publisher = {Universit{\"a}t Potsdam}, pages = {viii, 112}, year = {2017}, abstract = {The correctness of model transformations is a crucial element for model-driven engineering of high quality software. In particular, behavior preservation is the most important correctness property avoiding the introduction of semantic errors during the model-driven engineering process. Behavior preservation verification techniques either show that specific properties are preserved, or more generally and complex, they show some kind of behavioral equivalence or refinement between source and target model of the transformation. Both kinds of behavior preservation verification goals have been presented with automatic tool support for the instance level, i.e. for a given source and target model specified by the model transformation. However, up until now there is no automatic verification approach available at the transformation level, i.e. for all source and target models specified by the model transformation. In this report, we extend our results presented in [27] and outline a new sophisticated approach for the automatic verification of behavior preservation captured by bisimulation resp. simulation for model transformations specified by triple graph grammars and semantic definitions given by graph transformation rules. In particular, we show that the behavior preservation problem can be reduced to invariant checking for graph transformation and that the resulting checking problem can be addressed by our own invariant checker even for a complex example where a sequence chart is transformed into communicating automata. We further discuss today's limitations of invariant checking for graph transformation and motivate further lines of future work in this direction.}, language = {en} } @book{DoellnerKirschNienhaus2005, author = {D{\"o}llner, J{\"u}rgen Roland Friedrich and Kirsch, Florian and Nienhaus, Marc}, title = {Visualizing Design and Spatial Assembly of Interactive CSG}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-937786-56-2}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-33771}, publisher = {Universit{\"a}t Potsdam}, pages = {8}, year = {2005}, abstract = {For interactive construction of CSG models understanding the layout of a model is essential for its efficient manipulation. To understand position and orientation of aggregated components of a CSG model, we need to realize its visible and occluded parts as a whole. Hence, transparency and enhanced outlines are key techniques to assist comprehension. We present a novel real-time rendering technique for visualizing design and spatial assembly of CSG models. As enabling technology we combine an image-space CSG rendering algorithm with blueprint rendering. Blueprint rendering applies depth peeling for extracting layers of ordered depth from polygonal models and then composes them in sorted order facilitating a clear insight of the models. We develop a solution for implementing depth peeling for CSG models considering their depth complexity. Capturing surface colors of each layer and later combining the results allows for generating order-independent transparency as one major rendering technique for CSG models. We further define visually important edges for CSG models and integrate an image-space edgeenhancement technique for detecting them in each layer. In this way, we extract visually important edges that are directly and not directly visible to outline a model's layout. Combining edges with transparency rendering, finally, generates edge-enhanced depictions of image-based CSG models and allows us to realize their complex, spatial assembly.}, language = {en} } @phdthesis{EidSabbagh2015, author = {Eid-Sabbagh, Rami-Habib}, title = {Business process architectures}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-79719}, school = {Universit{\"a}t Potsdam}, pages = {xvii, 256}, year = {2015}, abstract = {Business Process Management has become an integral part of modern organizations in the private and public sector for improving their operations. In the course of Business Process Management efforts, companies and organizations assemble large process model repositories with many hundreds and thousands of business process models bearing a large amount of information. With the advent of large business process model collections, new challenges arise as structuring and managing a large amount of process models, their maintenance, and their quality assurance. This is covered by business process architectures that have been introduced for organizing and structuring business process model collections. A variety of business process architecture approaches have been proposed that align business processes along aspects of interest, e. g., goals, functions, or objects. They provide a high level categorization of single processes ignoring their interdependencies, thus hiding valuable information. The production of goods or the delivery of services are often realized by a complex system of interdependent business processes. Hence, taking a holistic view at business processes interdependencies becomes a major necessity to organize, analyze, and assess the impact of their re-/design. Visualizing business processes interdependencies reveals hidden and implicit information from a process model collection. In this thesis, we present a novel Business Process Architecture approach for representing and analyzing business process interdependencies on an abstract level. We propose a formal definition of our Business Process Architecture approach, design correctness criteria, and develop analysis techniques for assessing their quality. We describe a methodology for applying our Business Process Architecture approach top-down and bottom-up. This includes techniques for Business Process Architecture extraction from, and decomposition to process models while considering consistency issues between business process architecture and process model level. Using our extraction algorithm, we present a novel technique to identify and visualize data interdependencies in Business Process Data Architectures. Our Business Process Architecture approach provides business process experts,managers, and other users of a process model collection with an overview that allows reasoning about a large set of process models, understanding, and analyzing their interdependencies in a facilitated way. In this regard we evaluated our Business Process Architecture approach in an experiment and provide implementations of selected techniques.}, language = {en} } @book{EidSabbaghHeweltWeske2013, author = {Eid-Sabbagh, Rami-Habib and Hewelt, Marcin and Weske, Mathias}, title = {Business process architectures with multiplicities : transformation and correctness}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-257-5}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-66780}, publisher = {Universit{\"a}t Potsdam}, pages = {18}, year = {2013}, abstract = {Business processes are instrumental to manage work in organisations. To study the interdependencies between business processes, Business Process Architectures have been introduced. These express trigger and message ow relations between business processes. When we investigate real world Business Process Architectures, we find complex interdependencies, involving multiple process instances. These aspects have not been studied in detail so far, especially concerning correctness properties. In this paper, we propose a modular transformation of BPAs to open nets for the analysis of behavior involving multiple business processes with multiplicities. For this purpose we introduce intermediary nets to portray semantics of multiplicity specifications. We evaluate our approach on a use case from the public sector.}, language = {en} } @inproceedings{FanMasuharaAotanietal.2010, author = {Fan, Yang and Masuhara, Hidehiko and Aotani, Tomoyuki and Nielson, Flemming and Nielson, Hanne Riis}, title = {AspectKE*: Security aspects with program analysis for distributed systems}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-41369}, year = {2010}, abstract = {Enforcing security policies to distributed systems is difficult, in particular, when a system contains untrusted components. We designed AspectKE*, a distributed AOP language based on a tuple space, to tackle this issue. In AspectKE*, aspects can enforce access control policies that depend on future behavior of running processes. One of the key language features is the predicates and functions that extract results of static program analysis, which are useful for defining security aspects that have to know about future behavior of a program. AspectKE* also provides a novel variable binding mechanism for pointcuts, so that pointcuts can uniformly specify join points based on both static and dynamic information about the program. Our implementation strategy performs fundamental static analysis at load-time, so as to retain runtime overheads minimal. We implemented a compiler for AspectKE*, and demonstrate usefulness of AspectKE* through a security aspect for a distributed chat system.}, language = {en} } @book{FelgentreffBorningHirschfeld2013, author = {Felgentreff, Tim and Borning, Alan and Hirschfeld, Robert}, title = {Babelsberg : specifying and solving constraints on object behavior}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-265-0}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-67296}, publisher = {Universit{\"a}t Potsdam}, pages = {53}, year = {2013}, abstract = {Constraints allow developers to specify desired properties of systems in a number of domains, and have those properties be maintained automatically. This results in compact, declarative code, avoiding scattered code to check and imperatively re-satisfy invariants. Despite these advantages, constraint programming is not yet widespread, with standard imperative programming still the norm. There is a long history of research on integrating constraint programming with the imperative paradigm. However, this integration typically does not unify the constructs for encapsulation and abstraction from both paradigms. This impedes re-use of modules, as client code written in one paradigm can only use modules written to support that paradigm. Modules require redundant definitions if they are to be used in both paradigms. We present a language - Babelsberg - that unifies the constructs for en- capsulation and abstraction by using only object-oriented method definitions for both declarative and imperative code. Our prototype - Babelsberg/R - is an extension to Ruby, and continues to support Ruby's object-oriented se- mantics. It allows programmers to add constraints to existing Ruby programs in incremental steps by placing them on the results of normal object-oriented message sends. It is implemented by modifying a state-of-the-art Ruby virtual machine. The performance of standard object-oriented code without con- straints is only modestly impacted, with typically less than 10\% overhead compared with the unmodified virtual machine. Furthermore, our architec- ture for adding multiple constraint solvers allows Babelsberg to deal with constraints in a variety of domains. We argue that our approach provides a useful step toward making con- straint solving a generic tool for object-oriented programmers. We also provide example applications, written in our Ruby-based implementation, which use constraints in a variety of application domains, including interactive graphics, circuit simulations, data streaming with both hard and soft constraints on performance, and configuration file Management.}, language = {en} } @book{FelgentreffHirschfeldMillsteinetal.2015, author = {Felgentreff, Tim and Hirschfeld, Robert and Millstein, Todd and Borning, Alan}, title = {Babelsberg/RML}, number = {103}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-348-0}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-83826}, publisher = {Universit{\"a}t Potsdam}, pages = {68}, year = {2015}, abstract = {New programming language designs are often evaluated on concrete implementations. However, in order to draw conclusions about the language design from the evaluation of concrete programming languages, these implementations need to be verified against the formalism of the design. To that end, we also have to ensure that the design actually meets its stated goals. A useful tool for the latter has been to create an executable semantics from a formalism that can execute a test suite of examples. However, this mechanism so far did not allow to verify an implementation against the design. Babelsberg is a new design for a family of object-constraint languages. Recently, we have developed a formal semantics to clarify some issues in the design of those languages. Supplementing this work, we report here on how this formalism is turned into an executable operational semantics using the RML system. Furthermore, we show how we extended the executable semantics to create a framework that can generate test suites for the concrete Babelsberg implementations that provide traceability from the design to the language. Finally, we discuss how these test suites helped us find and correct mistakes in the Babelsberg implementation for JavaScript.}, language = {en} } @book{GellerHirschfeldBracha2010, author = {Geller, Felix and Hirschfeld, Robert and Bracha, Gilad}, title = {Pattern Matching for an object-oriented and dynamically typed programming language}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-065-6}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-43035}, publisher = {Universit{\"a}t Potsdam}, pages = {81}, year = {2010}, abstract = {Pattern matching is a well-established concept in the functional programming community. It provides the means for concisely identifying and destructuring values of interest. This enables a clean separation of data structures and respective functionality, as well as dispatching functionality based on more than a single value. Unfortunately, expressive pattern matching facilities are seldomly incorporated in present object-oriented programming languages. We present a seamless integration of pattern matching facilities in an object-oriented and dynamically typed programming language: Newspeak. We describe language extensions to improve the practicability and integrate our additions with the existing programming environment for Newspeak. This report is based on the first author's master's thesis.}, language = {en} } @article{GhahremaniGiese2020, author = {Ghahremani, Sona and Giese, Holger}, title = {Evaluation of self-healing systems}, series = {Computers}, volume = {9}, journal = {Computers}, number = {1}, publisher = {MDPI}, address = {Basel}, issn = {2073-431X}, doi = {10.3390/computers9010016}, pages = {32}, year = {2020}, abstract = {Evaluating the performance of self-adaptive systems is challenging due to their interactions with often highly dynamic environments. In the specific case of self-healing systems, the performance evaluations of self-healing approaches and their parameter tuning rely on the considered characteristics of failure occurrences and the resulting interactions with the self-healing actions. In this paper, we first study the state-of-the-art for evaluating the performances of self-healing systems by means of a systematic literature review. We provide a classification of different input types for such systems and analyse the limitations of each input type. A main finding is that the employed inputs are often not sophisticated regarding the considered characteristics for failure occurrences. To further study the impact of the identified limitations, we present experiments demonstrating that wrong assumptions regarding the characteristics of the failure occurrences can result in large performance prediction errors, disadvantageous design-time decisions concerning the selection of alternative self-healing approaches, and disadvantageous deployment-time decisions concerning parameter tuning. Furthermore, the experiments indicate that employing multiple alternative input characteristics can help with reducing the risk of premature disadvantageous design-time decisions.}, language = {en} } @book{GieseBecker2013, author = {Giese, Holger and Becker, Basil}, title = {Modeling and verifying dynamic evolving service-oriented architectures}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-246-9}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-65112}, publisher = {Universit{\"a}t Potsdam}, pages = {97}, year = {2013}, abstract = {The service-oriented architecture supports the dynamic assembly and runtime reconfiguration of complex open IT landscapes by means of runtime binding of service contracts, launching of new components and termination of outdated ones. Furthermore, the evolution of these IT landscapes is not restricted to exchanging components with other ones using the same service contracts, as new services contracts can be added as well. However, current approaches for modeling and verification of service-oriented architectures do not support these important capabilities to their full extend.In this report we present an extension of the current OMG proposal for service modeling with UML - SoaML - which overcomes these limitations. It permits modeling services and their service contracts at different levels of abstraction, provides a formal semantics for all modeling concepts, and enables verifying critical properties. Our compositional and incremental verification approach allows for complex properties including communication parameters and time and covers besides the dynamic binding of service contracts and the replacement of components also the evolution of the systems by means of new service contracts. The modeling as well as verification capabilities of the presented approach are demonstrated by means of a supply chain example and the verification results of a first prototype are shown.}, language = {en} } @book{GieseHildebrandt2009, author = {Giese, Holger and Hildebrandt, Stephan}, title = {Efficient model synchronization of large-scale models}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-940793-84-3}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-29281}, publisher = {Universit{\"a}t Potsdam}, pages = {27}, year = {2009}, abstract = {Model-driven software development requires techniques to consistently propagate modifications between different related models to realize its full potential. For large-scale models, efficiency is essential in this respect. In this paper, we present an improved model synchronization algorithm based on triple graph grammars that is highly efficient and, therefore, can also synchronize large-scale models sufficiently fast. We can show, that the overall algorithm has optimal complexity if it is dominating the rule matching and further present extensive measurements that show the efficiency of the presented model transformation and synchronization technique.}, language = {en} } @book{GieseHildebrandtLambers2010, author = {Giese, Holger and Hildebrandt, Stephan and Lambers, Leen}, title = {Toward bridging the gap between formal semantics and implementation of triple graph grammars}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-078-6}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-45219}, publisher = {Universit{\"a}t Potsdam}, pages = {26}, year = {2010}, abstract = {The correctness of model transformations is a crucial element for the model-driven engineering of high quality software. A prerequisite to verify model transformations at the level of the model transformation specification is that an unambiguous formal semantics exists and that the employed implementation of the model transformation language adheres to this semantics. However, for existing relational model transformation approaches it is usually not really clear under which constraints particular implementations are really conform to the formal semantics. In this paper, we will bridge this gap for the formal semantics of triple graph grammars (TGG) and an existing efficient implementation. Whereas the formal semantics assumes backtracking and ignores non-determinism, practical implementations do not support backtracking, require rule sets that ensure determinism, and include further optimizations. Therefore, we capture how the considered TGG implementation realizes the transformation by means of operational rules, define required criteria and show conformance to the formal semantics if these criteria are fulfilled. We further outline how static analysis can be employed to guarantee these criteria.}, language = {en} } @book{GieseHildebrandtNeumannetal.2012, author = {Giese, Holger and Hildebrandt, Stephan and Neumann, Stefan and W{\"a}tzoldt, Sebastian}, title = {Industrial case study on the integration of SysML and AUTOSAR with triple graph grammars}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-191-2}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-60184}, publisher = {Universit{\"a}t Potsdam}, pages = {vi, 51}, year = {2012}, abstract = {During the overall development of complex engineering systems different modeling notations are employed. For example, in the domain of automotive systems system engineering models are employed quite early to capture the requirements and basic structuring of the entire system, while software engineering models are used later on to describe the concrete software architecture. Each model helps in addressing the specific design issue with appropriate notations and at a suitable level of abstraction. However, when we step forward from system design to the software design, the engineers have to ensure that all decisions captured in the system design model are correctly transferred to the software engineering model. Even worse, when changes occur later on in either model, today the consistency has to be reestablished in a cumbersome manual step. In this report, we present in an extended version of [Holger Giese, Stefan Neumann, and Stephan Hildebrandt. Model Synchronization at Work: Keeping SysML and AUTOSAR Models Consistent. In Gregor Engels, Claus Lewerentz, Wilhelm Sch{\"a}fer, Andy Sch{\"u}rr, and B. Westfechtel, editors, Graph Transformations and Model Driven Enginering - Essays Dedicated to Manfred Nagl on the Occasion of his 65th Birthday, volume 5765 of Lecture Notes in Computer Science, pages 555-579. Springer Berlin / Heidelberg, 2010.] how model synchronization and consistency rules can be applied to automate this task and ensure that the different models are kept consistent. We also introduce a general approach for model synchronization. Besides synchronization, the approach consists of tool adapters as well as consistency rules covering the overlap between the synchronized parts of a model and the rest. We present the model synchronization algorithm based on triple graph grammars in detail and further exemplify the general approach by means of a model synchronization solution between system engineering models in SysML and software engineering models in AUTOSAR which has been developed for an industrial partner. In the appendix as extension to [19] the meta-models and all TGG rules for the SysML to AUTOSAR model synchronization are documented.}, language = {en} } @phdthesis{Glander2012, author = {Glander, Tassilo}, title = {Multi-scale representations of virtual 3D city models}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-64117}, school = {Universit{\"a}t Potsdam}, year = {2012}, abstract = {Virtual 3D city and landscape models are the main subject investigated in this thesis. They digitally represent urban space and have many applications in different domains, e.g., simulation, cadastral management, and city planning. Visualization is an elementary component of these applications. Photo-realistic visualization with an increasingly high degree of detail leads to fundamental problems for comprehensible visualization. A large number of highly detailed and textured objects within a virtual 3D city model may create visual noise and overload the users with information. Objects are subject to perspective foreshortening and may be occluded or not displayed in a meaningful way, as they are too small. In this thesis we present abstraction techniques that automatically process virtual 3D city and landscape models to derive abstracted representations. These have a reduced degree of detail, while essential characteristics are preserved. After introducing definitions for model, scale, and multi-scale representations, we discuss the fundamentals of map generalization as well as techniques for 3D generalization. The first presented technique is a cell-based generalization of virtual 3D city models. It creates abstract representations that have a highly reduced level of detail while maintaining essential structures, e.g., the infrastructure network, landmark buildings, and free spaces. The technique automatically partitions the input virtual 3D city model into cells based on the infrastructure network. The single building models contained in each cell are aggregated to abstracted cell blocks. Using weighted infrastructure elements, cell blocks can be computed on different hierarchical levels, storing the hierarchy relation between the cell blocks. Furthermore, we identify initial landmark buildings within a cell by comparing the properties of individual buildings with the aggregated properties of the cell. For each block, the identified landmark building models are subtracted using Boolean operations and integrated in a photo-realistic way. Finally, for the interactive 3D visualization we discuss the creation of the virtual 3D geometry and their appearance styling through colors, labeling, and transparency. We demonstrate the technique with example data sets. Additionally, we discuss applications of generalization lenses and transitions between abstract representations. The second technique is a real-time-rendering technique for geometric enhancement of landmark objects within a virtual 3D city model. Depending on the virtual camera distance, landmark objects are scaled to ensure their visibility within a specific distance interval while deforming their environment. First, in a preprocessing step a landmark hierarchy is computed, this is then used to derive distance intervals for the interactive rendering. At runtime, using the virtual camera distance, a scaling factor is computed and applied to each landmark. The scaling factor is interpolated smoothly at the interval boundaries using cubic B{\´e}zier splines. Non-landmark geometry that is near landmark objects is deformed with respect to a limited number of landmarks. We demonstrate the technique by applying it to a highly detailed virtual 3D city model and a generalized 3D city model. In addition we discuss an adaptation of the technique for non-linear projections and mobile devices. The third technique is a real-time rendering technique to create abstract 3D isocontour visualization of virtual 3D terrain models. The virtual 3D terrain model is visualized as a layered or stepped relief. The technique works without preprocessing and, as it is implemented using programmable graphics hardware, can be integrated with minimal changes into common terrain rendering techniques. Consequently, the computation is done in the rendering pipeline for each vertex, primitive, i.e., triangle, and fragment. For each vertex, the height is quantized to the nearest isovalue. For each triangle, the vertex configuration with respect to their isovalues is determined first. Using the configuration, the triangle is then subdivided. The subdivision forms a partial step geometry aligned with the triangle. For each fragment, the surface appearance is determined, e.g., depending on the surface texture, shading, and height-color-mapping. Flexible usage of the technique is demonstrated with applications from focus+context visualization, out-of-core terrain rendering, and information visualization. This thesis presents components for the creation of abstract representations of virtual 3D city and landscape models. Re-using visual language from cartography, the techniques enable users to build on their experience with maps when interpreting these representations. Simultaneously, characteristics of 3D geovirtual environments are taken into account by addressing and discussing, e.g., continuous scale, interaction, and perspective.}, language = {en} } @unpublished{GrapentinHeidlerKorschetal.2014, author = {Grapentin, Andreas and Heidler, Kirstin and Korsch, Dimitri and Kumar Sah, Rakesh and Kunzmann, Nicco and Henning, Johannes and Mattis, Toni and Rein, Patrick and Seckler, Eric and Groneberg, Bj{\"o}rn and Zimmermann, Florian}, title = {Embedded operating system projects}, number = {90}, editor = {Hentschel, Uwe and Richter, Daniel and Polze, Andreas}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-296-4}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-69154}, pages = {xi, 87}, year = {2014}, abstract = {In today's life, embedded systems are ubiquitous. But they differ from traditional desktop systems in many aspects - these include predictable timing behavior (real-time), the management of scarce resources (memory, network), reliable communication protocols, energy management, special purpose user-interfaces (headless operation), system configuration, programming languages (to support software/hardware co-design), and modeling techniques. Within this technical report, authors present results from the lecture "Operating Systems for Embedded Computing" that has been offered by the "Operating Systems and Middleware" group at HPI in Winter term 2013/14. Focus of the lecture and accompanying projects was on principles of real-time computing. Students had the chance to gather practical experience with a number of different OSes and applications and present experiences with near-hardware programming. Projects address the entire spectrum, from bare-metal programming to harnessing a real-time OS to exercising the full software/hardware co-design cycle. Three outstanding projects are at the heart of this technical report. Project 1 focuses on the development of a bare-metal operating system for LEGO Mindstorms EV3. While still a toy, it comes with a powerful ARM processor, 64 MB of main memory, standard interfaces, such as Bluetooth and network protocol stacks. EV3 runs a version of 1 1 Introduction Linux. Sources are available from Lego's web site. However, many devices and their driver software are proprietary and not well documented. Developing a new, bare-metal OS for the EV3 requires an understanding of the EV3 boot process. Since no standard input/output devices are available, initial debugging steps are tedious. After managing these initial steps, the project was able to adapt device drivers for a few Lego devices to an extent that a demonstrator (the Segway application) could be successfully run on the new OS. Project 2 looks at the EV3 from a different angle. The EV3 is running a pretty decent version of Linux- in principle, the RT_PREEMPT patch can turn any Linux system into a real-time OS by modifying the behavior of a number of synchronization constructs at the heart of the OS. Priority inversion is a problem that is solved by protocols such as priority inheritance or priority ceiling. Real-time OSes implement at least one of the protocols. The central idea of the project was the comparison of non-real-time and real-time variants of Linux on the EV3 hardware. A task set that showed effects of priority inversion on standard EV3 Linux would operate flawlessly on the Linux version with the RT_PREEMPT-patch applied. If only patching Lego's version of Linux was that easy... Project 3 takes the notion of real-time computing more seriously. The application scenario was centered around our Carrera Digital 132 racetrack. Obtaining position information from the track, controlling individual cars, detecting and modifying the Carrera Digital protocol required design and implementation of custom controller hardware. What to implement in hardware, firmware, and what to implement in application software - this was the central question addressed by the project.}, language = {en} } @book{GroeneKnoepfelKugeletal.2004, author = {Gr{\"o}ne, Bernhard and Kn{\"o}pfel, Andreas and Kugel, Rudolf and Schmidt, Oliver}, title = {The Apache Modeling Project}, isbn = {978-3-937786-14-8}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-33147}, publisher = {Universit{\"a}t Potsdam}, year = {2004}, abstract = {This document presents an introduction to the Apache HTTP Server, covering both an overview and implementation details. It presents results of the Apache Modelling Project done by research assistants and students of the Hasso-Plattner-Institute in 2001, 2002 and 2003. The Apache HTTP Server was used to introduce students to the application of the modeling technique FMC, a method that supports transporting knowledge about complex systems in the domain of information processing (software and hardware as well). After an introduction to HTTP servers in general, we will focus on protocols and web technology. Then we will discuss Apache, its operational environment and its extension capabilities— the module API. Finally we will guide the reader through parts of the Apache source code and explain the most important pieces.}, language = {en} } @inproceedings{GruenerMuehleGayvoronskayaetal.2019, author = {Gr{\"u}ner, Andreas and M{\"u}hle, Alexander and Gayvoronskaya, Tatiana and Meinel, Christoph}, title = {A quantifiable trustmModel for Blockchain-based identity management}, series = {IEEE 2018 International Congress on Cybermatics / 2018 IEEE Conferences on Internet of Things, Green Computing and Communications, cyber, physical and Social Computing, Smart Data, Blockchain, Computer and Information Technology}, booktitle = {IEEE 2018 International Congress on Cybermatics / 2018 IEEE Conferences on Internet of Things, Green Computing and Communications, cyber, physical and Social Computing, Smart Data, Blockchain, Computer and Information Technology}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-7975-3}, doi = {10.1109/Cybermatics_2018.2018.00250}, pages = {1475 -- 1482}, year = {2019}, language = {en} } @article{GruenerMuehleMeinel2021, author = {Gr{\"u}ner, Andreas and M{\"u}hle, Alexander and Meinel, Christoph}, title = {ATIB}, series = {IEEE access : practical research, open solutions / Institute of Electrical and Electronics Engineers}, volume = {9}, journal = {IEEE access : practical research, open solutions / Institute of Electrical and Electronics Engineers}, publisher = {Institute of Electrical and Electronics Engineers}, address = {New York, NY}, issn = {2169-3536}, doi = {10.1109/ACCESS.2021.3116095}, pages = {138553 -- 138570}, year = {2021}, abstract = {Identity management is a principle component of securing online services. In the advancement of traditional identity management patterns, the identity provider remained a Trusted Third Party (TTP). The service provider and the user need to trust a particular identity provider for correct attributes amongst other demands. This paradigm changed with the invention of blockchain-based Self-Sovereign Identity (SSI) solutions that primarily focus on the users. SSI reduces the functional scope of the identity provider to an attribute provider while enabling attribute aggregation. Besides that, the development of new protocols, disregarding established protocols and a significantly fragmented landscape of SSI solutions pose considerable challenges for an adoption by service providers. We propose an Attribute Trust-enhancing Identity Broker (ATIB) to leverage the potential of SSI for trust-enhancing attribute aggregation. Furthermore, ATIB abstracts from a dedicated SSI solution and offers standard protocols. Therefore, it facilitates the adoption by service providers. Despite the brokered integration approach, we show that ATIB provides a high security posture. Additionally, ATIB does not compromise the ten foundational SSI principles for the users.}, language = {en} } @phdthesis{Gumienny2013, author = {Gumienny, Raja Carola}, title = {Understanding the adoption of digital whiteboard systems for collaborative design work}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-72417}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {User-centered design processes are the first choice when new interactive systems or services are developed to address real customer needs and provide a good user experience. Common tools for collecting user research data, conducting brainstormings, or sketching ideas are whiteboards and sticky notes. They are ubiquitously available, and no technical or domain knowledge is necessary to use them. However, traditional pen and paper tools fall short when saving the content and sharing it with others unable to be in the same location. They are also missing further digital advantages such as searching or sorting content. Although research on digital whiteboard and sticky note applications has been conducted for over 20 years, these tools are not widely adopted in company contexts. While many research prototypes exist, they have not been used for an extended period of time in a real-world context. The goal of this thesis is to investigate what the enablers and obstacles for the adoption of digital whiteboard systems are. As an instrument for different studies, we developed the Tele-Board software system for collaborative creative work. Based on interviews, observations, and findings from former research, we tried to transfer the analog way of working to the digital world. Being a software system, Tele-Board can be used with a variety of hardware and does not depend on special devices. This feature became one of the main factors for adoption on a larger scale. In this thesis, I will present three studies on the use of Tele-Board with different user groups and foci. I will use a combination of research methods (laboratory case studies and data from field research) with the overall goal of finding out when a digital whiteboard system is used and in which cases not. Not surprisingly, the system is used and accepted if a user sees a main benefit that neither analog tools nor other applications can offer. However, I found that these perceived benefits are very different for each user and usage context. If a tool provides possibilities to use in different ways and with different equipment, the chances of its adoption by a larger group increase. Tele-Board has now been in use for over 1.5 years in a global IT company in at least five countries with a constantly growing user base. Its use, advantages, and disadvantages will be described based on 42 interviews and usage statistics from server logs. Through these insights and findings from laboratory case studies, I will present a detailed analysis of digital whiteboard use in different contexts with design implications for future systems.}, language = {en} } @phdthesis{Gustafson2013, author = {Gustafson, Sean}, title = {Imaginary Interfaces}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-68960}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {The size of a mobile device is primarily determined by the size of the touchscreen. As such, researchers have found that the way to achieve ultimate mobility is to abandon the screen altogether. These wearable devices are operated using hand gestures, voice commands or a small number of physical buttons. By abandoning the screen these devices also abandon the currently dominant spatial interaction style (such as tapping on buttons), because, seemingly, there is nothing to tap on. Unfortunately this design prevents users from transferring their learned interaction knowledge gained from traditional touchscreen-based devices. In this dissertation, I present Imaginary Interfaces, which return spatial interaction to screenless mobile devices. With these interfaces, users point and draw in the empty space in front of them or on the palm of their hands. While they cannot see the results of their interaction, they obtain some visual and tactile feedback by watching and feeling their hands interact. After introducing the concept of Imaginary Interfaces, I present two hardware prototypes that showcase two different forms of interaction with an imaginary interface, each with its own advantages: mid-air imaginary interfaces can be large and expressive, while palm-based imaginary interfaces offer an abundance of tactile features that encourage learning. Given that imaginary interfaces offer no visual output, one of the key challenges is to enable users to discover the interface's layout. This dissertation offers three main solutions: offline learning with coordinates, browsing with audio feedback and learning by transfer. The latter I demonstrate with the Imaginary Phone, a palm-based imaginary interface that mimics the layout of a physical mobile phone that users are already familiar with. Although these designs enable interaction with Imaginary Interfaces, they tell us little about why this interaction is possible. In the final part of this dissertation, I present an exploration into which human perceptual abilities are used when interacting with a palm-based imaginary interface and how much each accounts for performance with the interface. These findings deepen our understanding of Imaginary Interfaces and suggest that palm-based Imaginary Interfaces can enable stand-alone eyes-free use for many applications, including interfaces for visually impaired users.}, language = {en} } @article{GoebelLagodzinskiSeidel2021, author = {G{\"o}bel, Andreas and Lagodzinski, Julius Albert Gregor and Seidel, Karen}, title = {Counting homomorphisms to trees modulo a prime}, series = {ACM transactions on computation theory : TOCT / Association for Computing Machinery}, volume = {13}, journal = {ACM transactions on computation theory : TOCT / Association for Computing Machinery}, number = {3}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {1942-3454}, doi = {10.1145/3460958}, pages = {1 -- 33}, year = {2021}, abstract = {Many important graph-theoretic notions can be encoded as counting graph homomorphism problems, such as partition functions in statistical physics, in particular independent sets and colourings. In this article, we study the complexity of \#(p) HOMSTOH, the problem of counting graph homomorphisms from an input graph to a graph H modulo a prime number p. Dyer and Greenhill proved a dichotomy stating that the tractability of non-modular counting graph homomorphisms depends on the structure of the target graph. Many intractable cases in non-modular counting become tractable in modular counting due to the common phenomenon of cancellation. In subsequent studies on counting modulo 2, however, the influence of the structure of H on the tractability was shown to persist, which yields similar dichotomies.
Our main result states that for every tree H and every prime p the problem \#pHOMSTOH is either polynomial time computable or \#P-p-complete. This relates to the conjecture of Faben and Jerrum stating that this dichotomy holds for every graph H when counting modulo 2. In contrast to previous results on modular counting, the tractable cases of \#pHOMSTOH are essentially the same for all values of the modulo when H is a tree. To prove this result, we study the structural properties of a homomorphism. As an important interim result, our study yields a dichotomy for the problem of counting weighted independent sets in a bipartite graph modulo some prime p. These results are the first suggesting that such dichotomies hold not only for the modulo 2 case but also for the modular counting functions of all primes p.}, language = {en} } @article{HaarmannHolfterPufahletal.2021, author = {Haarmann, Stephan and Holfter, Adrian and Pufahl, Luise and Weske, Mathias}, title = {Formal framework for checking compliance of data-driven case management}, series = {Journal on data semantics : JoDS}, volume = {10}, journal = {Journal on data semantics : JoDS}, number = {1-2}, publisher = {Springer}, address = {Heidelberg}, issn = {1861-2032}, doi = {10.1007/s13740-021-00120-3}, pages = {143 -- 163}, year = {2021}, abstract = {Business processes are often specified in descriptive or normative models. Both types of models should adhere to internal and external regulations, such as company guidelines or laws. Employing compliance checking techniques, it is possible to verify process models against rules. While traditionally compliance checking focuses on well-structured processes, we address case management scenarios. In case management, knowledge workers drive multi-variant and adaptive processes. Our contribution is based on the fragment-based case management approach, which splits a process into a set of fragments. The fragments are synchronized through shared data but can, otherwise, be dynamically instantiated and executed. We formalize case models using Petri nets. We demonstrate the formalization for design-time and run-time compliance checking and present a proof-of-concept implementation. The application of the implemented compliance checking approach to a use case exemplifies its effectiveness while designing a case model. The empirical evaluation on a set of case models for measuring the performance of the approach shows that rules can often be checked in less than a second.}, language = {en} } @article{HackerKrestelGrundmannetal.2020, author = {Hacker, Philipp and Krestel, Ralf and Grundmann, Stefan and Naumann, Felix}, title = {Explainable AI under contract and tort law}, series = {Artificial intelligence and law}, volume = {28}, journal = {Artificial intelligence and law}, number = {4}, publisher = {Springer}, address = {Dordrecht}, issn = {0924-8463}, doi = {10.1007/s10506-020-09260-6}, pages = {415 -- 439}, year = {2020}, abstract = {This paper shows that the law, in subtle ways, may set hitherto unrecognized incentives for the adoption of explainable machine learning applications. In doing so, we make two novel contributions. First, on the legal side, we show that to avoid liability, professional actors, such as doctors and managers, may soon be legally compelled to use explainable ML models. We argue that the importance of explainability reaches far beyond data protection law, and crucially influences questions of contractual and tort liability for the use of ML models. To this effect, we conduct two legal case studies, in medical and corporate merger applications of ML. As a second contribution, we discuss the (legally required) trade-off between accuracy and explainability and demonstrate the effect in a technical case study in the context of spam classification.}, language = {en} } @book{HagedornSchoebelUflackeretal.2007, author = {Hagedorn, Benjamin and Sch{\"o}bel, Michael and Uflacker, Matthias and Copaciu, Flavius and Milanovic, Nikola}, title = {Proceedings of the fall 2006 workshop of the HPI research school on service-oriented systems engineering}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-939469-58-2}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-33052}, publisher = {Universit{\"a}t Potsdam}, pages = {Getr. Z{\"a}hlung}, year = {2007}, abstract = {1. Design and Composition of 3D Geoinformation Services Benjamin Hagedorn 2. Operating System Abstractions for Service-Based Systems Michael Sch{\"o}bel 3. A Task-oriented Approach to User-centered Design of Service-Based Enterprise Applications Matthias Uflacker 4. A Framework for Adaptive Transport in Service- Oriented Systems based on Performance Prediction Flavius Copaciu 5. Asynchronicity and Loose Coupling in Service-Oriented Architectures Nikola Milanovic}, language = {en} } @inproceedings{HannousseArdourelDouence2010, author = {Hannousse, Abdelhakim and Ardourel, Gilles and Douence, R{\´e}mi}, title = {Views for aspectualizing component models}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-41359}, year = {2010}, abstract = {Component based software development (CBSD) and aspectoriented software development (AOSD) are two complementary approaches. However, existing proposals for integrating aspects into component models are direct transposition of object-oriented AOSD techniques to components. In this article, we propose a new approach based on views. Our proposal introduces crosscutting components quite naturally and can be integrated into different component models.}, language = {en} } @inproceedings{Harrison2010, author = {Harrison, William}, title = {Malleability, obliviousness and aspects for broadcast service attachment}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-41389}, year = {2010}, abstract = {An important characteristic of Service-Oriented Architectures is that clients do not depend on the service implementation's internal assignment of methods to objects. It is perhaps the most important technical characteristic that differentiates them from more common object-oriented solutions. This characteristic makes clients and services malleable, allowing them to be rearranged at run-time as circumstances change. That improvement in malleability is impaired by requiring clients to direct service requests to particular services. Ideally, the clients are totally oblivious to the service structure, as they are to aspect structure in aspect-oriented software. Removing knowledge of a method implementation's location, whether in object or service, requires re-defining the boundary line between programming language and middleware, making clearer specification of dependence on protocols, and bringing the transaction-like concept of failure scopes into language semantics as well. This paper explores consequences and advantages of a transition from object-request brokering to service-request brokering, including the potential to improve our ability to write more parallel software.}, language = {en} } @book{HauptMarrHirschfeld2011, author = {Haupt, Michael and Marr, Stefan and Hirschfeld, Robert}, title = {CSOM/PL : a virtual machine product line}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-134-9}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-52332}, publisher = {Universit{\"a}t Potsdam}, pages = {26}, year = {2011}, abstract = {CSOM/PL is a software product line (SPL) derived from applying multi-dimensional separation of concerns (MDSOC) techniques to the domain of high-level language virtual machine (VM) implementations. For CSOM/PL, we modularised CSOM, a Smalltalk VM implemented in C, using VMADL (virtual machine architecture description language). Several features of the original CSOM were encapsulated in VMADL modules and composed in various combinations. In an evaluation of our approach, we show that applying MDSOC and SPL principles to a domain as complex as that of VMs is not only feasible but beneficial, as it improves understandability, maintainability, and configurability of VM implementations without harming performance.}, language = {en} } @phdthesis{Hebig2014, author = {Hebig, Regina}, title = {Evolution of model-driven engineering settings in practice}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-70761}, school = {Universit{\"a}t Potsdam}, year = {2014}, abstract = {Nowadays, software systems are getting more and more complex. To tackle this challenge most diverse techniques, such as design patterns, service oriented architectures (SOA), software development processes, and model-driven engineering (MDE), are used to improve productivity, while time to market and quality of the products stay stable. Multiple of these techniques are used in parallel to profit from their benefits. While the use of sophisticated software development processes is standard, today, MDE is just adopted in practice. However, research has shown that the application of MDE is not always successful. It is not fully understood when advantages of MDE can be used and to what degree MDE can also be disadvantageous for productivity. Further, when combining different techniques that aim to affect the same factor (e.g. productivity) the question arises whether these techniques really complement each other or, in contrast, compensate their effects. Due to that, there is the concrete question how MDE and other techniques, such as software development process, are interrelated. Both aspects (advantages and disadvantages for productivity as well as the interrelation to other techniques) need to be understood to identify risks relating to the productivity impact of MDE. Before studying MDE's impact on productivity, it is necessary to investigate the range of validity that can be reached for the results. This includes two questions. First, there is the question whether MDE's impact on productivity is similar for all approaches of adopting MDE in practice. Second, there is the question whether MDE's impact on productivity for an approach of using MDE in practice remains stable over time. The answers for both questions are crucial for handling risks of MDE, but also for the design of future studies on MDE success. This thesis addresses these questions with the goal to support adoption of MDE in future. To enable a differentiated discussion about MDE, the term MDE setting'' is introduced. MDE setting refers to the applied technical setting, i.e. the employed manual and automated activities, artifacts, languages, and tools. An MDE setting's possible impact on productivity is studied with a focus on changeability and the interrelation to software development processes. This is done by introducing a taxonomy of changeability concerns that might be affected by an MDE setting. Further, three MDE traits are identified and it is studied for which manifestations of these MDE traits software development processes are impacted. To enable the assessment and evaluation of an MDE setting's impacts, the Software Manufacture Model language is introduced. This is a process modeling language that allows to reason about how relations between (modeling) artifacts (e.g. models or code files) change during application of manual or automated development activities. On that basis, risk analysis techniques are provided. These techniques allow identifying changeability risks and assessing the manifestations of the MDE traits (and with it an MDE setting's impact on software development processes). To address the range of validity, MDE settings from practice and their evolution histories were capture in context of this thesis. First, this data is used to show that MDE settings cover the whole spectrum concerning their impact on changeability or interrelation to software development processes. Neither it is seldom that MDE settings are neutral for processes nor is it seldom that MDE settings have impact on processes. Similarly, the impact on changeability differs relevantly. Second, a taxonomy of evolution of MDE settings is introduced. In that context it is discussed to what extent different types of changes on an MDE setting can influence this MDE setting's impact on changeability and the interrelation to processes. The category of structural evolution, which can change these characteristics of an MDE setting, is identified. The captured MDE settings from practice are used to show that structural evolution exists and is common. In addition, some examples of structural evolution steps are collected that actually led to a change in the characteristics of the respective MDE settings. Two implications are: First, the assessed diversity of MDE settings evaluates the need for the analysis techniques that shall be presented in this thesis. Second, evolution is one explanation for the diversity of MDE settings in practice. To summarize, this thesis studies the nature and evolution of MDE settings in practice. As a result support for the adoption of MDE settings is provided in form of techniques for the identification of risks relating to productivity impacts.}, language = {en} } @book{HebigGiese2012, author = {Hebig, Regina and Giese, Holger}, title = {MDE settings in SAP : a descriptive field study}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-192-9}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-60193}, publisher = {Universit{\"a}t Potsdam}, pages = {64}, year = {2012}, abstract = {MDE techniques are more and more used in praxis. However, there is currently a lack of detailed reports about how different MDE techniques are integrated into the development and combined with each other. To learn more about such MDE settings, we performed a descriptive and exploratory field study with SAP, which is a worldwide operating company with around 50.000 employees and builds enterprise software applications. This technical report describes insights we got during this study. For example, we identified that MDE settings are subject to evolution. Finally, this report outlines directions for future research to provide practical advises for the application of MDE settings.}, language = {en} } @book{HebigGieseBatoulisetal.2015, author = {Hebig, Regina and Giese, Holger and Batoulis, Kimon and Langer, Philipp and Zamani Farahani, Armin and Yao, Gary and Wolowyk, Mychajlo}, title = {Development of AUTOSAR standard documents at Carmeq GmbH}, number = {92}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-317-6}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-71535}, publisher = {Universit{\"a}t Potsdam}, pages = {52}, year = {2015}, abstract = {This report documents the captured MDE history of Carmeq GmbH, in context of the project Evolution of MDE Settings in Practice. The goal of the project is the elicitation of MDE approaches and their evolution.}, language = {en} } @phdthesis{Heise2014, author = {Heise, Arvid}, title = {Data cleansing and integration operators for a parallel data analytics platform}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-77100}, school = {Universit{\"a}t Potsdam}, pages = {ii, 179}, year = {2014}, abstract = {The data quality of real-world datasets need to be constantly monitored and maintained to allow organizations and individuals to reliably use their data. Especially, data integration projects suffer from poor initial data quality and as a consequence consume more effort and money. Commercial products and research prototypes for data cleansing and integration help users to improve the quality of individual and combined datasets. They can be divided into either standalone systems or database management system (DBMS) extensions. On the one hand, standalone systems do not interact well with DBMS and require time-consuming data imports and exports. On the other hand, DBMS extensions are often limited by the underlying system and do not cover the full set of data cleansing and integration tasks. We overcome both limitations by implementing a concise set of five data cleansing and integration operators on the parallel data analytics platform Stratosphere. We define the semantics of the operators, present their parallel implementation, and devise optimization techniques for individual operators and combinations thereof. Users specify declarative queries in our query language METEOR with our new operators to improve the data quality of individual datasets or integrate them to larger datasets. By integrating the data cleansing operators into the higher level language layer of Stratosphere, users can easily combine cleansing operators with operators from other domains, such as information extraction, to complex data flows. Through a generic description of the operators, the Stratosphere optimizer reorders operators even from different domains to find better query plans. As a case study, we reimplemented a part of the large Open Government Data integration project GovWILD with our new operators and show that our queries run significantly faster than the original GovWILD queries, which rely on relational operators. Evaluation reveals that our operators exhibit good scalability on up to 100 cores, so that even larger inputs can be efficiently processed by scaling out to more machines. Finally, our scripts are considerably shorter than the original GovWILD scripts, which results in better maintainability of the scripts.}, language = {en} } @book{HerschelNaumann2008, author = {Herschel, Melanie and Naumann, Felix}, title = {Space and time scalability of duplicate detection in graph data}, isbn = {978-3-940793-46-1}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-32851}, publisher = {Universit{\"a}t Potsdam}, year = {2008}, abstract = {Duplicate detection consists in determining different representations of real-world objects in a database. Recent research has considered the use of relationships among object representations to improve duplicate detection. In the general case where relationships form a graph, research has mainly focused on duplicate detection quality/effectiveness. Scalability has been neglected so far, even though it is crucial for large real-world duplicate detection tasks. In this paper we scale up duplicate detection in graph data (DDG) to large amounts of data and pairwise comparisons, using the support of a relational database system. To this end, we first generalize the process of DDG. We then present how to scale algorithms for DDG in space (amount of data processed with limited main memory) and in time. Finally, we explore how complex similarity computation can be performed efficiently. Experiments on data an order of magnitude larger than data considered so far in DDG clearly show that our methods scale to large amounts of data not residing in main memory.}, language = {en} } @misc{HesseMatthiesSinzigetal.2019, author = {Hesse, G{\"u}nter and Matthies, Christoph and Sinzig, Werner and Uflacker, Matthias}, title = {Adding Value by Combining Business and Sensor Data}, series = {Database Systems for Advanced Applications}, volume = {11448}, journal = {Database Systems for Advanced Applications}, publisher = {Springer}, address = {Cham}, isbn = {978-3-030-18590-9}, issn = {0302-9743}, doi = {10.1007/978-3-030-18590-9_80}, pages = {528 -- 532}, year = {2019}, abstract = {Industry 4.0 and the Internet of Things are recent developments that have lead to the creation of new kinds of manufacturing data. Linking this new kind of sensor data to traditional business information is crucial for enterprises to take advantage of the data's full potential. In this paper, we present a demo which allows experiencing this data integration, both vertically between technical and business contexts and horizontally along the value chain. The tool simulates a manufacturing company, continuously producing both business and sensor data, and supports issuing ad-hoc queries that answer specific questions related to the business. In order to adapt to different environments, users can configure sensor characteristics to their needs.}, language = {en} } @phdthesis{Holz2013, author = {Holz, Christian}, title = {3D from 2D touch}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-67796}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {While interaction with computers used to be dominated by mice and keyboards, new types of sensors now allow users to interact through touch, speech, or using their whole body in 3D space. These new interaction modalities are often referred to as "natural user interfaces" or "NUIs." While 2D NUIs have experienced major success on billions of mobile touch devices sold, 3D NUI systems have so far been unable to deliver a mobile form factor, mainly due to their use of cameras. The fact that cameras require a certain distance from the capture volume has prevented 3D NUI systems from reaching the flat form factor mobile users expect. In this dissertation, we address this issue by sensing 3D input using flat 2D sensors. The systems we present observe the input from 3D objects as 2D imprints upon physical contact. By sampling these imprints at very high resolutions, we obtain the objects' textures. In some cases, a texture uniquely identifies a biometric feature, such as the user's fingerprint. In other cases, an imprint stems from the user's clothing, such as when walking on multitouch floors. By analyzing from which part of the 3D object the 2D imprint results, we reconstruct the object's pose in 3D space. While our main contribution is a general approach to sensing 3D input on 2D sensors upon physical contact, we also demonstrate three applications of our approach. (1) We present high-accuracy touch devices that allow users to reliably touch targets that are a third of the size of those on current touch devices. We show that different users and 3D finger poses systematically affect touch sensing, which current devices perceive as random input noise. We introduce a model for touch that compensates for this systematic effect by deriving the 3D finger pose and the user's identity from each touch imprint. We then investigate this systematic effect in detail and explore how users conceptually touch targets. Our findings indicate that users aim by aligning visual features of their fingers with the target. We present a visual model for touch input that eliminates virtually all systematic effects on touch accuracy. (2) From each touch, we identify users biometrically by analyzing their fingerprints. Our prototype Fiberio integrates fingerprint scanning and a display into the same flat surface, solving a long-standing problem in human-computer interaction: secure authentication on touchscreens. Sensing 3D input and authenticating users upon touch allows Fiberio to implement a variety of applications that traditionally require the bulky setups of current 3D NUI systems. (3) To demonstrate the versatility of 3D reconstruction on larger touch surfaces, we present a high-resolution pressure-sensitive floor that resolves the texture of objects upon touch. Using the same principles as before, our system GravitySpace analyzes all imprints and identifies users based on their shoe soles, detects furniture, and enables accurate touch input using feet. By classifying all imprints, GravitySpace detects the users' body parts that are in contact with the floor and then reconstructs their 3D body poses using inverse kinematics. GravitySpace thus enables a range of applications for future 3D NUI systems based on a flat sensor, such as smart rooms in future homes. We conclude this dissertation by projecting into the future of mobile devices. Focusing on the mobility aspect of our work, we explore how NUI devices may one day augment users directly in the form of implanted devices.}, language = {en} } @book{HuCordelMeinel2006, author = {Hu, Ji and Cordel, Dirk and Meinel, Christoph}, title = {A virtual machine architecture for creating IT-security laboratories}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-939469-13-1}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-33077}, publisher = {Universit{\"a}t Potsdam}, pages = {50}, year = {2006}, abstract = {E-learning is a flexible and personalized alternative to traditional education. Nonetheless, existing e-learning systems for IT security education have difficulties in delivering hands-on experience because of the lack of proximity. Laboratory environments and practical exercises are indispensable instruction tools to IT security education, but security education in con-ventional computer laboratories poses the problem of immobility as well as high creation and maintenance costs. Hence, there is a need to effectively transform security laboratories and practical exercises into e-learning forms. This report introduces the Tele-Lab IT-Security architecture that allows students not only to learn IT security principles, but also to gain hands-on security experience by exercises in an online laboratory environment. In this architecture, virtual machines are used to provide safe user work environments instead of real computers. Thus, traditional laboratory environments can be cloned onto the Internet by software, which increases accessibilities to laboratory resources and greatly reduces investment and maintenance costs. Under the Tele-Lab IT-Security framework, a set of technical solutions is also proposed to provide effective functionalities, reliability, security, and performance. The virtual machines with appropriate resource allocation, software installation, and system configurations are used to build lightweight security laboratories on a hosting computer. Reliability and availability of laboratory platforms are covered by the virtual machine management framework. This management framework provides necessary monitoring and administration services to detect and recover critical failures of virtual machines at run time. Considering the risk that virtual machines can be misused for compromising production networks, we present security management solutions to prevent misuse of laboratory resources by security isolation at the system and network levels. This work is an attempt to bridge the gap between e-learning/tele-teaching and practical IT security education. It is not to substitute conventional teaching in laboratories but to add practical features to e-learning. This report demonstrates the possibility to implement hands-on security laboratories on the Internet reliably, securely, and economically.}, language = {en} } @article{KaitouaRablMarkl2020, author = {Kaitoua, Abdulrahman and Rabl, Tilmann and Markl, Volker}, title = {A distributed data exchange engine for polystores}, series = {Information technology : methods and applications of informatics and information technology}, volume = {62}, journal = {Information technology : methods and applications of informatics and information technology}, number = {3-4}, publisher = {De Gruyter}, address = {Berlin}, issn = {1611-2776}, doi = {10.1515/itit-2019-0037}, pages = {145 -- 156}, year = {2020}, abstract = {There is an increasing interest in fusing data from heterogeneous sources. Combining data sources increases the utility of existing datasets, generating new information and creating services of higher quality. A central issue in working with heterogeneous sources is data migration: In order to share and process data in different engines, resource intensive and complex movements and transformations between computing engines, services, and stores are necessary. Muses is a distributed, high-performance data migration engine that is able to interconnect distributed data stores by forwarding, transforming, repartitioning, or broadcasting data among distributed engines' instances in a resource-, cost-, and performance-adaptive manner. As such, it performs seamless information sharing across all participating resources in a standard, modular manner. We show an overall improvement of 30 \% for pipelining jobs across multiple engines, even when we count the overhead of Muses in the execution time. This performance gain implies that Muses can be used to optimise large pipelines that leverage multiple engines.}, language = {en} } @book{KlauckMaschlerTausche2017, author = {Klauck, Stefan and Maschler, Fabian and Tausche, Karsten}, title = {Proceedings of the Fourth HPI Cloud Symposium "Operating the Cloud" 2016}, number = {117}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-401-2}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-394513}, publisher = {Universit{\"a}t Potsdam}, pages = {32}, year = {2017}, abstract = {Every year, the Hasso Plattner Institute (HPI) invites guests from industry and academia to a collaborative scientific workshop on the topic Every year, the Hasso Plattner Institute (HPI) invites guests from industry and academia to a collaborative scientific workshop on the topic "Operating the Cloud". Our goal is to provide a forum for the exchange of knowledge and experience between industry and academia. Co-located with the event is the HPI's Future SOC Lab day, which offers an additional attractive and conducive environment for scientific and industry related discussions. "Operating the Cloud" aims to be a platform for productive interactions of innovative ideas, visions, and upcoming technologies in the field of cloud operation and administration. On the occasion of this symposium we called for submissions of research papers and practitioner's reports. A compilation of the research papers realized during the fourth HPI cloud symposium "Operating the Cloud" 2016 are published in this proceedings. We thank the authors for exciting presentations and insights into their current work and research. Moreover, we look forward to more interesting submissions for the upcoming symposium later in the year. Every year, the Hasso Plattner Institute (HPI) invites guests from industry and academia to a collaborative scientific workshop on the topic "Operating the Cloud". Our goal is to provide a forum for the exchange of knowledge and experience between industry and academia. Co-located with the event is the HPI's Future SOC Lab day, which offers an additional attractive and conducive environment for scientific and industry related discussions. "Operating the Cloud" aims to be a platform for productive interactions of innovative ideas, visions, and upcoming technologies in the field of cloud operation and administration.}, language = {en} } @book{KleineHirschfeldBracha2011, author = {Kleine, Matthias and Hirschfeld, Robert and Bracha, Gilad}, title = {An abstraction for version control systems}, series = {Technische Berichte des Hasso-Plattner-Instituts f{\"u}r Softwaresystemtechnik an der Universit{\"a}t Potsdam}, journal = {Technische Berichte des Hasso-Plattner-Instituts f{\"u}r Softwaresystemtechnik an der Universit{\"a}t Potsdam}, number = {54}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-158-5}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-55629}, publisher = {Universit{\"a}t Potsdam}, pages = {77}, year = {2011}, abstract = {Versionsverwaltungssysteme (VCS) erm{\"o}glichen es Entwicklern, {\"A}nderungen an Softwareartifakten zu verwalten. VCS werden mit Hilfe einer Vielzahl verschiedener Werkzeuge bedient, wie z.\,B. graphische Front-ends oder Kommandozeilenwerkzeuge. Es ist w{\"u}nschenswert mit einzelnen solcher Werkzeuge unterschiedliche VCS bedienen zu k{\"o}nnen. Bislang hat sich jedoch keine Abstraktion f{\"u}r Versionsverwaltungssysteme durchgesetzt, mit deren Hilfe solche Werkzeuge erstellt werden k{\"o}nnen. Stattdessen implementieren Werkzeuge zur Interaktion mit mehreren VCS ad-hoc L{\"o}sungen. Diese Masterarbeit stellt Pur vor, eine Abstraktion {\"u}ber Versionsverwaltungskonzepte. Mit Hilfe von Pur k{\"o}nnen Anwendungsprogramme entwickelt werden, die mit mehreren Versionsverwaltungssystemen interagieren k{\"o}nnen. Im Rahmen dieser Arbeit wird eine Implementierung dieser Abstraktion bereitgestellt und mit Hilfe eines Anwendungsprogramms validiert.}, language = {en} } @phdthesis{Kluth2011, author = {Kluth, Stephan}, title = {Quantitative modeling and analysis with FMC-QE}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-52987}, school = {Universit{\"a}t Potsdam}, year = {2011}, abstract = {The modeling and evaluation calculus FMC-QE, the Fundamental Modeling Concepts for Quanti-tative Evaluation [1], extends the Fundamental Modeling Concepts (FMC) for performance modeling and prediction. In this new methodology, the hierarchical service requests are in the main focus, because they are the origin of every service provisioning process. Similar to physics, these service requests are a tuple of value and unit, which enables hierarchical service request transformations at the hierarchical borders and therefore the hierarchical modeling. Through reducing the model complexity of the models by decomposing the system in different hierarchical views, the distinction between operational and control states and the calculation of the performance values on the assumption of the steady state, FMC-QE has a scalable applica-bility on complex systems. According to FMC, the system is modeled in a 3-dimensional hierarchical representation space, where system performance parameters are described in three arbitrarily fine-grained hierarchi-cal bipartite diagrams. The hierarchical service request structures are modeled in Entity Relationship Diagrams. The static server structures, divided into logical and real servers, are de-scribed as Block Diagrams. The dynamic behavior and the control structures are specified as Petri Nets, more precisely Colored Time Augmented Petri Nets. From the structures and pa-rameters of the performance model, a hierarchical set of equations is derived. The calculation of the performance values is done on the assumption of stationary processes and is based on fundamental laws of the performance analysis: Little's Law and the Forced Traffic Flow Law. Little's Law is used within the different hierarchical levels (horizontal) and the Forced Traffic Flow Law is the key to the dependencies among the hierarchical levels (vertical). This calculation is suitable for complex models and allows a fast (re-)calculation of different performance scenarios in order to support development and configuration decisions. Within the Research Group Zorn at the Hasso Plattner Institute, the work is embedded in a broader research in the development of FMC-QE. While this work is concentrated on the theoretical background, description and definition of the methodology as well as the extension and validation of the applicability, other topics are in the development of an FMC-QE modeling and evaluation tool and the usage of FMC-QE in the design of an adaptive transport layer in order to fulfill Quality of Service and Service Level Agreements in volatile service based environments. This thesis contains a state-of-the-art, the description of FMC-QE as well as extensions of FMC-QE in representative general models and case studies. In the state-of-the-art part of the thesis in chapter 2, an overview on existing Queueing Theory and Time Augmented Petri Net models and other quantitative modeling and evaluation languages and methodologies is given. Also other hierarchical quantitative modeling frameworks will be considered. The description of FMC-QE in chapter 3 consists of a summary of the foundations of FMC-QE, basic definitions, the graphical notations, the FMC-QE Calculus and the modeling of open queueing networks as an introductory example. The extensions of FMC-QE in chapter 4 consist of the integration of the summation method in order to support the handling of closed networks and the modeling of multiclass and semaphore scenarios. Furthermore, FMC-QE is compared to other performance modeling and evaluation approaches. In the case study part in chapter 5, proof-of-concept examples, like the modeling of a service based search portal, a service based SAP NetWeaver application and the Axis2 Web service framework will be provided. Finally, conclusions are given by a summary of contributions and an outlook on future work in chapter 6. [1] Werner Zorn. FMC-QE - A New Approach in Quantitative Modeling. In Hamid R. Arabnia, editor, Procee-dings of the International Conference on Modeling, Simulation and Visualization Methods (MSV 2007) within WorldComp '07, pages 280 - 287, Las Vegas, NV, USA, June 2007. CSREA Press. ISBN 1-60132-029-9.}, language = {en} } @article{KossmannHalfpapJankriftetal.2020, author = {Kossmann, Jan and Halfpap, Stefan and Jankrift, Marcel and Schlosser, Rainer}, title = {Magic mirror in my hand, which is the best in the land?}, series = {Proceedings of the VLDB Endowment}, volume = {13}, journal = {Proceedings of the VLDB Endowment}, number = {11}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {2150-8097}, doi = {10.14778/3407790.3407832}, pages = {2382 -- 2395}, year = {2020}, abstract = {Indexes are essential for the efficient processing of database workloads. Proposed solutions for the relevant and challenging index selection problem range from metadata-based simple heuristics, over sophisticated multi-step algorithms, to approaches that yield optimal results. The main challenges are (i) to accurately determine the effect of an index on the workload cost while considering the interaction of indexes and (ii) a large number of possible combinations resulting from workloads containing many queries and massive schemata with possibly thousands of attributes.
In this work, we describe and analyze eight index selection algorithms that are based on different concepts and compare them along different dimensions, such as solution quality, runtime, multi-column support, solution granularity, and complexity. In particular, we analyze the solutions of the algorithms for the challenging analytical Join Order, TPC-H, and TPC-DS benchmarks. Afterward, we assess strengths and weaknesses, infer insights for index selection in general and each approach individually, before we give recommendations on when to use which approach.}, language = {en} } @article{KossmannSchlosser2020, author = {Kossmann, Jan and Schlosser, Rainer}, title = {Self-driving database systems}, series = {Distributed and parallel databases}, volume = {38}, journal = {Distributed and parallel databases}, number = {4}, publisher = {Springer}, address = {Dordrecht}, issn = {0926-8782}, doi = {10.1007/s10619-020-07288-w}, pages = {795 -- 817}, year = {2020}, abstract = {Challenges for self-driving database systems, which tune their physical design and configuration autonomously, are manifold: Such systems have to anticipate future workloads, find robust configurations efficiently, and incorporate knowledge gained by previous actions into later decisions. We present a component-based framework for self-driving database systems that enables database integration and development of self-managing functionality with low overhead by relying on separation of concerns. By keeping the components of the framework reusable and exchangeable, experiments are simplified, which promotes further research in that area. Moreover, to optimize multiple mutually dependent features, e.g., index selection and compression configurations, we propose a linear programming (LP) based algorithm to derive an efficient tuning order automatically. Afterwards, we demonstrate the applicability and scalability of our approach with reproducible examples.}, language = {en} } @article{KoumarelasJiangNaumann2020, author = {Koumarelas, Ioannis and Jiang, Lan and Naumann, Felix}, title = {Data preparation for duplicate detection}, series = {Journal of data and information quality : (JDIQ)}, volume = {12}, journal = {Journal of data and information quality : (JDIQ)}, number = {3}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {1936-1955}, doi = {10.1145/3377878}, pages = {24}, year = {2020}, abstract = {Data errors represent a major issue in most application workflows. Before any important task can take place, a certain data quality has to be guaranteed by eliminating a number of different errors that may appear in data. Typically, most of these errors are fixed with data preparation methods, such as whitespace removal. However, the particular error of duplicate records, where multiple records refer to the same entity, is usually eliminated independently with specialized techniques. Our work is the first to bring these two areas together by applying data preparation operations under a systematic approach prior to performing duplicate detection.
Our process workflow can be summarized as follows: It begins with the user providing as input a sample of the gold standard, the actual dataset, and optionally some constraints to domain-specific data preparations, such as address normalization. The preparation selection operates in two consecutive phases. First, to vastly reduce the search space of ineffective data preparations, decisions are made based on the improvement or worsening of pair similarities. Second, using the remaining data preparations an iterative leave-one-out classification process removes preparations one by one and determines the redundant preparations based on the achieved area under the precision-recall curve (AUC-PR). Using this workflow, we manage to improve the results of duplicate detection up to 19\% in AUC-PR.}, language = {en} } @article{KossmannPapenbrockNaumann2021, author = {Koßmann, Jan and Papenbrock, Thorsten and Naumann, Felix}, title = {Data dependencies for query optimization}, series = {The VLDB journal : the international journal on very large data bases / publ. on behalf of the VLDB Endowment}, volume = {31}, journal = {The VLDB journal : the international journal on very large data bases / publ. on behalf of the VLDB Endowment}, number = {1}, publisher = {Springer}, address = {Berlin ; Heidelberg ; New York}, issn = {1066-8888}, doi = {10.1007/s00778-021-00676-3}, pages = {1 -- 22}, year = {2021}, abstract = {Effective query optimization is a core feature of any database management system. While most query optimization techniques make use of simple metadata, such as cardinalities and other basic statistics, other optimization techniques are based on more advanced metadata including data dependencies, such as functional, uniqueness, order, or inclusion dependencies. This survey provides an overview, intuitive descriptions, and classifications of query optimization and execution strategies that are enabled by data dependencies. We consider the most popular types of data dependencies and focus on optimization strategies that target the optimization of relational database queries. The survey supports database vendors to identify optimization opportunities as well as DBMS researchers to find related work and open research questions.}, language = {en} } @book{KrauseGiese2012, author = {Krause, Christian and Giese, Holger}, title = {Quantitative modeling and analysis of service-oriented real-time systems using interval probabilistic timed automata}, publisher = {Universit{\"a}tsverlah Potsdam}, address = {Potsdam}, isbn = {978-3-86956-171-4}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-57845}, publisher = {Universit{\"a}t Potsdam}, pages = {45}, year = {2012}, abstract = {One of the key challenges in service-oriented systems engineering is the prediction and assurance of non-functional properties, such as the reliability and the availability of composite interorganizational services. Such systems are often characterized by a variety of inherent uncertainties, which must be addressed in the modeling and the analysis approach. The different relevant types of uncertainties can be categorized into (1) epistemic uncertainties due to incomplete knowledge and (2) randomization as explicitly used in protocols or as a result of physical processes. In this report, we study a probabilistic timed model which allows us to quantitatively reason about nonfunctional properties for a restricted class of service-oriented real-time systems using formal methods. To properly motivate the choice for the used approach, we devise a requirements catalogue for the modeling and the analysis of probabilistic real-time systems with uncertainties and provide evidence that the uncertainties of type (1) and (2) in the targeted systems have a major impact on the used models and require distinguished analysis approaches. The formal model we use in this report are Interval Probabilistic Timed Automata (IPTA). Based on the outlined requirements, we give evidence that this model provides both enough expressiveness for a realistic and modular specifiation of the targeted class of systems, and suitable formal methods for analyzing properties, such as safety and reliability properties in a quantitative manner. As technical means for the quantitative analysis, we build on probabilistic model checking, specifically on probabilistic time-bounded reachability analysis and computation of expected reachability rewards and costs. To carry out the quantitative analysis using probabilistic model checking, we developed an extension of the Prism tool for modeling and analyzing IPTA. Our extension of Prism introduces a means for modeling probabilistic uncertainty in the form of probability intervals, as required for IPTA. For analyzing IPTA, our Prism extension moreover adds support for probabilistic reachability checking and computation of expected rewards and costs. We discuss the performance of our extended version of Prism and compare the interval-based IPTA approach to models with fixed probabilities.}, language = {en} } @phdthesis{Kunze2013, author = {Kunze, Matthias}, title = {Searching business process models by example}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-68844}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {Business processes are fundamental to the operations of a company. Each product manufactured and every service provided is the result of a series of actions that constitute a business process. Business process management is an organizational principle that makes the processes of a company explicit and offers capabilities to implement procedures, control their execution, analyze their performance, and improve them. Therefore, business processes are documented as process models that capture these actions and their execution ordering, and make them accessible to stakeholders. As these models are an essential knowledge asset, they need to be managed effectively. In particular, the discovery and reuse of existing knowledge becomes challenging in the light of companies maintaining hundreds and thousands of process models. In practice, searching process models has been solved only superficially by means of free-text search of process names and their descriptions. Scientific contributions are limited in their scope, as they either present measures for process similarity or elaborate on query languages to search for particular aspects. However, they fall short in addressing efficient search, the presentation of search results, and the support to reuse discovered models. This thesis presents a novel search method, where a query is expressed by an exemplary business process model that describes the behavior of a possible answer. This method builds upon a formal framework that captures and compares the behavior of process models by the execution ordering of actions. The framework contributes a conceptual notion of behavioral distance that quantifies commonalities and differences of a pair of process models, and enables process model search. Based on behavioral distances, a set of measures is proposed that evaluate the quality of a particular search result to guide the user in assessing the returned matches. A projection of behavioral aspects to a process model enables highlighting relevant fragments that led to a match and facilitates its reuse. The thesis further elaborates on two search techniques that provide concrete behavioral distance functions as an instantiation of the formal framework. Querying enables search with a notion of behavioral inclusion with regard to the query. In contrast, similarity search obtains process models that are similar to a query, even if the query is not precisely matched. For both techniques, indexes are presented that enable efficient search. Methods to evaluate the quality and performance of process model search are introduced and applied to the techniques of this thesis. They show good results with regard to human assessment and scalability in a practical setting.}, language = {en} } @book{KunzeWeske2016, author = {Kunze, Matthias and Weske, Mathias}, title = {Behavioural Models}, publisher = {Springer}, address = {Cham}, isbn = {978-3-319-44958-6}, publisher = {Universit{\"a}t Potsdam}, pages = {279}, year = {2016}, abstract = {This textbook introduces the basis for modelling and analysing discrete dynamic systems, such as computer programmes, soft- and hardware systems, and business processes. The underlying concepts are introduced and concrete modelling techniques are described, such as finite automata, state machines, and Petri nets. The concepts are related to concrete application scenarios, among which business processes play a prominent role. The book consists of three parts, the first of which addresses the foundations of behavioural modelling. After a general introduction to modelling, it introduces transition systems as a basic formalism for representing the behaviour of discrete dynamic systems. This section also discusses causality, a fundamental concept for modelling and reasoning about behaviour. In turn, Part II forms the heart of the book and is devoted to models of behaviour. It details both sequential and concurrent systems and introduces finite automata, state machines and several different types of Petri nets. One chapter is especially devoted to business process models, workflow patterns and BPMN, the industry standard for modelling business processes. Lastly, Part III investigates how the behaviour of systems can be analysed. To this end, it introduces readers to the concept of state spaces. Further chapters cover the comparison of behaviour and the formal analysis and verification of behavioural models. The book was written for students of computer science and software engineering, as well as for programmers and system analysts interested in the behaviour of the systems they work on. It takes readers on a journey from the fundamentals of behavioural modelling to advanced techniques for modelling and analysing sequential and concurrent systems, and thus provides them a deep understanding of the concepts and techniques introduced and how they can be applied to concrete application scenarios.}, language = {en} } @inproceedings{KurbelNowakAzodietal.2015, author = {Kurbel, Karl and Nowak, Dawid and Azodi, Amir and Jaeger, David and Meinel, Christoph and Cheng, Feng and Sapegin, Andrey and Gawron, Marian and Morelli, Frank and Stahl, Lukas and Kerl, Stefan and Janz, Mariska and Hadaya, Abdulmasih and Ivanov, Ivaylo and Wiese, Lena and Neves, Mariana and Schapranow, Matthieu-Patrick and F{\"a}hnrich, Cindy and Feinbube, Frank and Eberhardt, Felix and Hagen, Wieland and Plauth, Max and Herscheid, Lena and Polze, Andreas and Barkowsky, Matthias and Dinger, Henriette and Faber, Lukas and Montenegro, Felix and Czach{\´o}rski, Tadeusz and Nycz, Monika and Nycz, Tomasz and Baader, Galina and Besner, Veronika and Hecht, Sonja and Schermann, Michael and Krcmar, Helmut and Wiradarma, Timur Pratama and Hentschel, Christian and Sack, Harald and Abramowicz, Witold and Sokolowska, Wioletta and Hossa, Tymoteusz and Opalka, Jakub and Fabisz, Karol and Kubaczyk, Mateusz and Cmil, Milena and Meng, Tianhui and Dadashnia, Sharam and Niesen, Tim and Fettke, Peter and Loos, Peter and Perscheid, Cindy and Schwarz, Christian and Schmidt, Christopher and Scholz, Matthias and Bock, Nikolai and Piller, Gunther and B{\"o}hm, Klaus and Norkus, Oliver and Clark, Brian and Friedrich, Bj{\"o}rn and Izadpanah, Babak and Merkel, Florian and Schweer, Ilias and Zimak, Alexander and Sauer, J{\"u}rgen and Fabian, Benjamin and Tilch, Georg and M{\"u}ller, David and Pl{\"o}ger, Sabrina and Friedrich, Christoph M. and Engels, Christoph and Amirkhanyan, Aragats and van der Walt, Est{\´e}e and Eloff, J. H. P. and Scheuermann, Bernd and Weinknecht, Elisa}, title = {HPI Future SOC Lab}, editor = {Meinel, Christoph and Polze, Andreas and Oswald, Gerhard and Strotmann, Rolf and Seibold, Ulrich and Schulzki, Bernhard}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-102516}, pages = {iii, 154}, year = {2015}, abstract = {Das Future SOC Lab am HPI ist eine Kooperation des Hasso-Plattner-Instituts mit verschiedenen Industriepartnern. Seine Aufgabe ist die Erm{\"o}glichung und F{\"o}rderung des Austausches zwischen Forschungsgemeinschaft und Industrie. Am Lab wird interessierten Wissenschaftlern eine Infrastruktur von neuester Hard- und Software kostenfrei f{\"u}r Forschungszwecke zur Verf{\"u}gung gestellt. Dazu z{\"a}hlen teilweise noch nicht am Markt verf{\"u}gbare Technologien, die im normalen Hochschulbereich in der Regel nicht zu finanzieren w{\"a}ren, bspw. Server mit bis zu 64 Cores und 2 TB Hauptspeicher. Diese Angebote richten sich insbesondere an Wissenschaftler in den Gebieten Informatik und Wirtschaftsinformatik. Einige der Schwerpunkte sind Cloud Computing, Parallelisierung und In-Memory Technologien. In diesem Technischen Bericht werden die Ergebnisse der Forschungsprojekte des Jahres 2015 vorgestellt. Ausgew{\"a}hlte Projekte stellten ihre Ergebnisse am 15. April 2015 und 4. November 2015 im Rahmen der Future SOC Lab Tag Veranstaltungen vor.}, language = {en} } @book{KuropkaMeyer2005, author = {Kuropka, Dominik and Meyer, Harald}, title = {Survey on Service Composition}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {3-937786-78-3}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-33787}, publisher = {Universit{\"a}t Potsdam}, pages = {35}, year = {2005}, abstract = {It is predicted that Service-oriented Architectures (SOA) will have a high impact on future electronic business and markets. Services will provide an self-contained and standardised interface towards business and are considered as the future platform for business-to-business and business-toconsumer trades. Founded by the complexity of real world business scenarios a huge need for an easy, flexible and automated creation and enactment of service compositions is observed. This survey explores the relationship of service composition with workflow management—a technology/ concept already in use in many business environments. The similarities between the both and the key differences between them are elaborated. Furthermore methods for composition of services ranging from manual, semi- to full-automated composition are sketched. This survey concludes that current tools for service composition are in an immature state and that there is still much research to do before service composition can be used easily and conveniently in real world scenarios. However, since automated service composition is a key enabler for the full potential of Service-oriented Architectures, further research on this field is imperative. This survey closes with a formal sample scenario presented in appendix A to give the reader an impression on how full-automated service composition works.}, language = {en} } @phdthesis{Kyprianidis2013, author = {Kyprianidis, Jan Eric}, title = {Structure adaptive stylization of images and video}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-64104}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {In the early days of computer graphics, research was mainly driven by the goal to create realistic synthetic imagery. By contrast, non-photorealistic computer graphics, established as its own branch of computer graphics in the early 1990s, is mainly motivated by concepts and principles found in traditional art forms, such as painting, illustration, and graphic design, and it investigates concepts and techniques that abstract from reality using expressive, stylized, or illustrative rendering techniques. This thesis focuses on the artistic stylization of two-dimensional content and presents several novel automatic techniques for the creation of simplified stylistic illustrations from color images, video, and 3D renderings. Primary innovation of these novel techniques is that they utilize the smooth structure tensor as a simple and efficient way to obtain information about the local structure of an image. More specifically, this thesis contributes to knowledge in this field in the following ways. First, a comprehensive review of the structure tensor is provided. In particular, different methods for integrating the minor eigenvector field of the smoothed structure tensor are developed, and the superiority of the smoothed structure tensor over the popular edge tangent flow is demonstrated. Second, separable implementations of the popular bilateral and difference of Gaussians filters that adapt to the local structure are presented. These filters avoid artifacts while being computationally highly efficient. Taken together, both provide an effective way to create a cartoon-style effect. Third, a generalization of the Kuwahara filter is presented that avoids artifacts by adapting the shape, scale, and orientation of the filter to the local structure. This causes directional image features to be better preserved and emphasized, resulting in overall sharper edges and a more feature-abiding painterly effect. In addition to the single-scale variant, a multi-scale variant is presented, which is capable of performing a highly aggressive abstraction. Fourth, a technique that builds upon the idea of combining flow-guided smoothing with shock filtering is presented, allowing for an aggressive exaggeration and an emphasis of directional image features. All presented techniques are suitable for temporally coherent per-frame filtering of video or dynamic 3D renderings, without requiring expensive extra processing, such as optical flow. Moreover, they can be efficiently implemented to process content in real-time on a GPU.}, language = {en} } @article{LambersOrejas2021, author = {Lambers, Leen and Orejas, Fernando}, title = {Transformation rules with nested application conditions}, series = {Theoretical computer science}, volume = {884}, journal = {Theoretical computer science}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0304-3975}, doi = {10.1016/j.tcs.2021.07.023}, pages = {44 -- 67}, year = {2021}, abstract = {Recently, initial conflicts were introduced in the framework of M-adhesive categories as an important optimization of critical pairs. In particular, they represent a proper subset such that each conflict is represented in a minimal context by a unique initial one. The theory of critical pairs has been extended in the framework of M-adhesive categories to rules with nested application conditions (ACs), restricting the applicability of a rule and generalizing the well-known negative application conditions. A notion of initial conflicts for rules with ACs does not exist yet. In this paper, on the one hand, we extend the theory of initial conflicts in the framework of M-adhesive categories to transformation rules with ACs. They represent a proper subset again of critical pairs for rules with ACs, and represent each conflict in a minimal context uniquely. They are moreover symbolic because we can show that in general no finite and complete set of conflicts for rules with ACs exists. On the other hand, we show that critical pairs are minimally M-complete, whereas initial conflicts are minimally complete. Finally, we introduce important special cases of rules with ACs for which we can obtain finite, minimally (M-)complete sets of conflicts.}, language = {en} } @article{LambersWeber2020, author = {Lambers, Leen and Weber, Jens}, title = {Preface to the special issue on the 11th International Conference on Graph Transformation}, series = {Journal of Logical and Algebraic Methods in Programming}, volume = {112}, journal = {Journal of Logical and Algebraic Methods in Programming}, publisher = {Elsevier}, address = {Amsterdam}, issn = {2352-2208}, doi = {10.1016/j.jlamp.2020.100525}, pages = {2}, year = {2020}, abstract = {This special issue contains extended versions of four selected papers from the 11th International Conference on Graph Transformation (ICGT 2018). The articles cover a tool for computing core graphs via SAT/SMT solvers (graph language definition), graph transformation through graph surfing in reaction systems (a new graph transformation formalism), the essence and initiality of conflicts in M-adhesive transformation systems, and a calculus of concurrent graph-rewriting processes (theory on conflicts and parallel independence).}, language = {en} } @phdthesis{Lange2013, author = {Lange, Dustin}, title = {Effective and efficient similarity search in databases}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-65712}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {Given a large set of records in a database and a query record, similarity search aims to find all records sufficiently similar to the query record. To solve this problem, two main aspects need to be considered: First, to perform effective search, the set of relevant records is defined using a similarity measure. Second, an efficient access method is to be found that performs only few database accesses and comparisons using the similarity measure. This thesis solves both aspects with an emphasis on the latter. In the first part of this thesis, a frequency-aware similarity measure is introduced. Compared record pairs are partitioned according to frequencies of attribute values. For each partition, a different similarity measure is created: machine learning techniques combine a set of base similarity measures into an overall similarity measure. After that, a similarity index for string attributes is proposed, the State Set Index (SSI), which is based on a trie (prefix tree) that is interpreted as a nondeterministic finite automaton. For processing range queries, the notion of query plans is introduced in this thesis to describe which similarity indexes to access and which thresholds to apply. The query result should be as complete as possible under some cost threshold. Two query planning variants are introduced: (1) Static planning selects a plan at compile time that is used for all queries. (2) Query-specific planning selects a different plan for each query. For answering top-k queries, the Bulk Sorted Access Algorithm (BSA) is introduced, which retrieves large chunks of records from the similarity indexes using fixed thresholds, and which focuses its efforts on records that are ranked high in more than one attribute and thus promising candidates. The described components form a complete similarity search system. Based on prototypical implementations, this thesis shows comparative evaluation results for all proposed approaches on different real-world data sets, one of which is a large person data set from a German credit rating agency.}, language = {en} } @book{LangeBoehmNaumann2010, author = {Lange, Dustin and B{\"o}hm, Christoph and Naumann, Felix}, title = {Extracting structured information from Wikipedia articles to populate infoboxes}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-081-6}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-45714}, publisher = {Universit{\"a}t Potsdam}, pages = {27}, year = {2010}, abstract = {Roughly every third Wikipedia article contains an infobox - a table that displays important facts about the subject in attribute-value form. The schema of an infobox, i.e., the attributes that can be expressed for a concept, is defined by an infobox template. Often, authors do not specify all template attributes, resulting in incomplete infoboxes. With iPopulator, we introduce a system that automatically populates infoboxes of Wikipedia articles by extracting attribute values from the article's text. In contrast to prior work, iPopulator detects and exploits the structure of attribute values for independently extracting value parts. We have tested iPopulator on the entire set of infobox templates and provide a detailed analysis of its effectiveness. For instance, we achieve an average extraction precision of 91\% for 1,727 distinct infobox template attributes.}, language = {en} } @book{LinckelsMeinel2005, author = {Linckels, Serge and Meinel, Christoph}, title = {An e-librarian service : natural language interface for an efficient semantic search within multimedia resources}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-937786-89-6}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-33088}, publisher = {Universit{\"a}t Potsdam}, pages = {40}, year = {2005}, abstract = {1 Introduction 1.1 Project formulation 1.2 Our contribution 2 Pedagogical Aspect 4 2.1 Modern teaching 2.2 Our Contribution 2.2.1 Autonomous and exploratory learning 2.2.2 Human machine interaction 2.2.3 Short multimedia clips 3 Ontology Aspect 3.1 Ontology driven expert systems 3.2 Our contribution 3.2.1 Ontology language 3.2.2 Concept Taxonomy 3.2.3 Knowledge base annotation 3.2.4 Description Logics 4 Natural language approach 4.1 Natural language processing in computer science 4.2 Our contribution 4.2.1 Explored strategies 4.2.2 Word equivalence 4.2.3 Semantic interpretation 4.2.4 Various problems 5 Information Retrieval Aspect 5.1 Modern information retrieval 5.2 Our contribution 5.2.1 Semantic query generation 5.2.2 Semantic relatedness 6 Implementation 6.1 Prototypes 6.2 Semantic layer architecture 6.3 Development 7 Experiments 7.1 Description of the experiments 7.2 General characteristics of the three sessions, instructions and procedure 7.3 First Session 7.4 Second Session 7.5 Third Session 7.6 Discussion and conclusion 8 Conclusion and future work 8.1 Conclusion 8.2 Open questions A Description Logics B Probabilistic context-free grammars}, language = {en} } @phdthesis{Lorey2014, author = {Lorey, Johannes}, title = {What's in a query : analyzing, predicting, and managing linked data access}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-72312}, school = {Universit{\"a}t Potsdam}, year = {2014}, abstract = {The term Linked Data refers to connected information sources comprising structured data about a wide range of topics and for a multitude of applications. In recent years, the conceptional and technical foundations of Linked Data have been formalized and refined. To this end, well-known technologies have been established, such as the Resource Description Framework (RDF) as a Linked Data model or the SPARQL Protocol and RDF Query Language (SPARQL) for retrieving this information. Whereas most research has been conducted in the area of generating and publishing Linked Data, this thesis presents novel approaches for improved management. In particular, we illustrate new methods for analyzing and processing SPARQL queries. Here, we present two algorithms suitable for identifying structural relationships between these queries. Both algorithms are applied to a large number of real-world requests to evaluate the performance of the approaches and the quality of their results. Based on this, we introduce different strategies enabling optimized access of Linked Data sources. We demonstrate how the presented approach facilitates effective utilization of SPARQL endpoints by prefetching results relevant for multiple subsequent requests. Furthermore, we contribute a set of metrics for determining technical characteristics of such knowledge bases. To this end, we devise practical heuristics and validate them through thorough analysis of real-world data sources. We discuss the findings and evaluate their impact on utilizing the endpoints. Moreover, we detail the adoption of a scalable infrastructure for improving Linked Data discovery and consumption. As we outline in an exemplary use case, this platform is eligible both for processing and provisioning the corresponding information.}, language = {en} } @article{LosterKoumarelasNaumann2021, author = {Loster, Michael and Koumarelas, Ioannis and Naumann, Felix}, title = {Knowledge transfer for entity resolution with siamese neural networks}, series = {ACM journal of data and information quality}, volume = {13}, journal = {ACM journal of data and information quality}, number = {1}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {1936-1955}, doi = {10.1145/3410157}, pages = {25}, year = {2021}, abstract = {The integration of multiple data sources is a common problem in a large variety of applications. Traditionally, handcrafted similarity measures are used to discover, merge, and integrate multiple representations of the same entity-duplicates-into a large homogeneous collection of data. Often, these similarity measures do not cope well with the heterogeneity of the underlying dataset. In addition, domain experts are needed to manually design and configure such measures, which is both time-consuming and requires extensive domain expertise.
We propose a deep Siamese neural network, capable of learning a similarity measure that is tailored to the characteristics of a particular dataset. With the properties of deep learning methods, we are able to eliminate the manual feature engineering process and thus considerably reduce the effort required for model construction. In addition, we show that it is possible to transfer knowledge acquired during the deduplication of one dataset to another, and thus significantly reduce the amount of data required to train a similarity measure. We evaluated our method on multiple datasets and compare our approach to state-of-the-art deduplication methods. Our approach outperforms competitors by up to +26 percent F-measure, depending on task and dataset. In addition, we show that knowledge transfer is not only feasible, but in our experiments led to an improvement in F-measure of up to +4.7 percent.}, language = {en} } @book{Luebbe2011, author = {L{\"u}bbe, Alexander}, title = {The effect of tangible media on individuals in business process modeling : a controlled experiment = Der Einfluss greifbarer Medien auf einzelne Personen bei der Gesch{\"a}ftsprozessmodellierung : ein kontrolliertes Experiment}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-108-0}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-49001}, publisher = {Universit{\"a}t Potsdam}, pages = {42}, year = {2011}, abstract = {In current practice, business processes modeling is done by trained method experts. Domain experts are interviewed to elicit their process information but not involved in modeling. We created a haptic toolkit for process modeling that can be used in process elicitation sessions with domain experts. We hypothesize that this leads to more effective process elicitation. This paper brakes down "effective elicitation" to 14 operationalized hypotheses. They are assessed in a controlled experiment using questionnaires, process model feedback tests and video analysis. The experiment compares our approach to structured interviews in a repeated measurement design. We executed the experiment with 17 student clerks from a trade school. They represent potential users of the tool. Six out of fourteen hypotheses showed significant difference due to the method applied. Subjects reported more fun and more insights into process modeling with tangible media. Video analysis showed significantly more reviews and corrections applied during process elicitation. Moreover, people take more time to talk and think about their processes. We conclude that tangible media creates a different working mode for people in process elicitation with fun, new insights and instant feedback on preliminary results.}, language = {en} } @article{MattisBeckmannReinetal.2022, author = {Mattis, Toni and Beckmann, Tom and Rein, Patrick and Hirschfeld, Robert}, title = {First-class concepts}, series = {Journal of object technology : JOT / ETH Z{\"u}rich, Department of Computer Science}, volume = {21}, journal = {Journal of object technology : JOT / ETH Z{\"u}rich, Department of Computer Science}, number = {2}, publisher = {ETH Z{\"u}rich, Department of Computer Science}, address = {Z{\"u}rich}, issn = {1660-1769}, doi = {10.5381/jot.2022.21.2.a6}, pages = {1 -- 15}, year = {2022}, abstract = {Ideally, programs are partitioned into independently maintainable and understandable modules. As a system grows, its architecture gradually loses the capability to accommodate new concepts in a modular way. While refactoring is expensive and not always possible, and the programming language might lack dedicated primary language constructs to express certain cross-cutting concerns, programmers are still able to explain and delineate convoluted concepts through secondary means: code comments, use of whitespace and arrangement of code, documentation, or communicating tacit knowledge.
Secondary constructs are easy to change and provide high flexibility in communicating cross-cutting concerns and other concepts among programmers. However, such secondary constructs usually have no reified representation that can be explored and manipulated as first-class entities through the programming environment.
In this exploratory work, we discuss novel ways to express a wide range of concepts, including cross-cutting concerns, patterns, and lifecycle artifacts independently of the dominant decomposition imposed by an existing architecture. We propose the representation of concepts as first-class objects inside the programming environment that retain the capability to change as easily as code comments. We explore new tools that allow programmers to view, navigate, and change programs based on conceptual perspectives. In a small case study, we demonstrate how such views can be created and how the programming experience changes from draining programmers' attention by stretching it across multiple modules toward focusing it on cohesively presented concepts. Our designs are geared toward facilitating multiple secondary perspectives on a system to co-exist in symbiosis with the original architecture, hence making it easier to explore, understand, and explain complex contexts and narratives that are hard or impossible to express using primary modularity constructs.}, language = {en} } @book{MaximovaGieseKrause2017, author = {Maximova, Maria and Giese, Holger and Krause, Christian}, title = {Probabilistic timed graph transformation systems}, number = {118}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-405-0}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-397055}, publisher = {Universit{\"a}t Potsdam}, pages = {34}, year = {2017}, abstract = {Today, software has become an intrinsic part of complex distributed embedded real-time systems. The next generation of embedded real-time systems will interconnect the today unconnected systems via complex software parts and the service-oriented paradigm. Therefore besides timed behavior and probabilistic behaviour also structure dynamics, where the architecture can be subject to changes at run-time, e.g. when dynamic binding of service end-points is employed or complex collaborations are established dynamically, is required. However, a modeling and analysis approach that combines all these necessary aspects does not exist so far. To fill the identified gap, we propose Probabilistic Timed Graph Transformation Systems (PTGTSs) as a high-level description language that supports all the necessary aspects of structure dynamics, timed behavior, and probabilistic behavior. We introduce the formal model of PTGTSs in this paper and present a mapping of models with finite state spaces to probabilistic timed automata (PTA) that allows to use the PRISM model checker to analyze PTGTS models with respect to PTCTL properties.}, language = {en} } @book{MeinelPlattnerDoellneretal.2014, author = {Meinel, Christoph and Plattner, Hasso and D{\"o}llner, J{\"u}rgen Roland Friedrich and Weske, Mathias and Polze, Andreas and Hirschfeld, Robert and Naumann, Felix and Giese, Holger and Baudisch, Patrick}, title = {Proceedings of the 7th Ph.D. Retreat of the HPI Research School on Service-oriented Systems Engineering}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-273-5}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-63490}, publisher = {Universit{\"a}t Potsdam}, pages = {ii, 218}, year = {2014}, abstract = {Design and Implementation of service-oriented architectures imposes a huge number of research questions from the fields of software engineering, system analysis and modeling, adaptability, and application integration. Component orientation and web services are two approaches for design and realization of complex web-based system. Both approaches allow for dynamic application adaptation as well as integration of enterprise application. Commonly used technologies, such as J2EE and .NET, form de facto standards for the realization of complex distributed systems. Evolution of component systems has lead to web services and service-based architectures. This has been manifested in a multitude of industry standards and initiatives such as XML, WSDL UDDI, SOAP, etc. All these achievements lead to a new and promising paradigm in IT systems engineering which proposes to design complex software solutions as collaboration of contractually defined software services. Service-Oriented Systems Engineering represents a symbiosis of best practices in object-orientation, component-based development, distributed computing, and business process management. It provides integration of business and IT concerns. The annual Ph.D. Retreat of the Research School provides each member the opportunity to present his/her current state of their research and to give an outline of a prospective Ph.D. thesis. Due to the interdisciplinary structure of the Research Scholl, this technical report covers a wide range of research topics. These include but are not limited to: Self-Adaptive Service-Oriented Systems, Operating System Support for Service-Oriented Systems, Architecture and Modeling of Service-Oriented Systems, Adaptive Process Management, Services Composition and Workflow Planning, Security Engineering of Service-Based IT Systems, Quantitative Analysis and Optimization of Service-Oriented Systems, Service-Oriented Systems in 3D Computer Graphics sowie Service-Oriented Geoinformatics.}, language = {en} } @book{MeinelSackBross2008, author = {Meinel, Christoph and Sack, Harald and Bross, Justus}, title = {Erster Deutscher IPv6 Gipfel}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-32865}, publisher = {Universit{\"a}t Potsdam}, year = {2008}, abstract = {Inhalt: KOMMUNIQU{\´E} GRUßWORT PROGRAMM HINTERGR{\"U}NDE UND FAKTEN REFERENTEN: BIOGRAFIE \& VOTRAGSZUSAMMENFASSUNG 1.) DER ERSTE DEUTSCHE IPV6 GIPFEL AM HASSO PLATTNER INSTITUT IN POTSDAM - PROF. DR. CHRISTOPH MEINEL - VIVIANE REDING 2.) IPV6, ITS TIME HAS COME - VINTON CERF 3.) DIE BEDEUTUNG VON IPV6 F{\"U}R DIE {\"O}FFENTLICHE VERWALTUNG IN DEUTSCHLAND - MARTIN SCHALLBRUCH 4.) TOWARDS THE FUTURE OF THE INTERNET - PROF. DR. LUTZ HEUSER 5.) IPV6 STRATEGY \& DEPLOYMENT STATUS IN JAPAN - HIROSHI MIYATA 6.) IPV6 STRATEGY \& DEPLOYMENT STATUS IN CHINA - PROF. WU HEQUAN 7.) IPV6 STRATEGY AND DEPLOYMENT STATUS IN KOREA - DR. EUNSOOK KIM 8.) IPV6 DEPLOYMENT EXPERIENCES IN GREEK SCHOOL NETWORK - ATHANASSIOS LIAKOPOULOS 9.) IPV6 NETWORK MOBILITY AND IST USAGE - JEAN-MARIE BONNIN 10.) IPV6 - R{\"U}STZEUG F{\"U}R OPERATOR \& ISP IPV6 DEPLOYMENT UND STRATEGIEN DER DEUTSCHEN TELEKOM - HENNING GROTE 11.) VIEW FROM THE IPV6 DEPLOYMENT FRONTLINE - YVES POPPE 12.) DEPLOYING IPV6 IN MOBILE ENVIRONMENTS - WOLFGANG FRITSCHE 13.) PRODUCTION READY IPV6 FROM CUSTOMER LAN TO THE INTERNET - LUTZ DONNERHACKE 14.) IPV6 - DIE BASIS F{\"U}R NETZWERKZENTRIERTE OPERATIONSF{\"U}HRUNG (NETOPF{\"U}) IN DER BUNDESWEHR HERAUSFORDERUNGEN - ANWENDUNGSFALLBETRACHTUNGEN - AKTIVIT{\"A}TEN - CARSTEN HATZIG 15.) WINDOWS VISTA \& IPV6 - BERND OURGHANLIAN 16.) IPV6 \& HOME NETWORKING TECHINCAL AND BUSINESS CHALLENGES - DR. TAYEB BEN MERIEM 17.) DNS AND DHCP FOR DUAL STACK NETWORKS - LAWRENCE HUGHES 18.) CAR INDUSTRY: GERMAN EXPERIENCE WITH IPV6 - AMARDEO SARMA 19.) IPV6 \& AUTONOMIC NETWORKING - RANGANAI CHAPARADZA 20.) P2P \& GRID USING IPV6 AND MOBILE IPV6 - DR. LATIF LADID}, language = {en} } @book{MeinelWillems2013, author = {Meinel, Christoph and Willems, Christian}, title = {openHPI : the MOOC offer at Hasso Plattner Institute}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-264-3}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-67176}, publisher = {Universit{\"a}t Potsdam}, pages = {21}, year = {2013}, abstract = {The new interactive online educational platform openHPI, (https://openHPI.de) from Hasso Plattner Institute (HPI), offers freely accessible courses at no charge for all who are interested in subjects in the field of information technology and computer science. Since 2011, "Massive Open Online Courses," called MOOCs for short, have been offered, first at Stanford University and then later at other U.S. elite universities. Following suit, openHPI provides instructional videos on the Internet and further reading material, combined with learning-supportive self-tests, homework and a social discussion forum. Education is further stimulated by the support of a virtual learning community. In contrast to "traditional" lecture platforms, such as the tele-TASK portal (http://www.tele-task.de) where multimedia recorded lectures are available on demand, openHPI offers didactic online courses. The courses have a fixed start date and offer a balanced schedule of six consecutive weeks presented in multimedia and, whenever possible, interactive learning material. Each week, one chapter of the course subject is treated. In addition, a series of learning videos, texts, self-tests and homework exercises are provided to course participants at the beginning of the week. The course offering is combined with a social discussion platform where participants have the opportunity to enter into an exchange with course instructors and fellow participants. Here, for example, they can get answers to questions and discuss the topics in depth. The participants naturally decide themselves about the type and range of their learning activities. They can make personal contributions to the course, for example, in blog posts or tweets, which they can refer to in the forum. In turn, other participants have the chance to comment on, discuss or expand on what has been said. In this way, the learners become the teachers and the subject matter offered to a virtual community is linked to a social learning network.}, language = {en} } @phdthesis{Menzel2011, author = {Menzel, Michael}, title = {Model-driven security in service-oriented architectures : leveraging security patterns to transform high-level security requirements to technical policies}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-59058}, school = {Universit{\"a}t Potsdam}, year = {2011}, abstract = {Service-oriented Architectures (SOA) facilitate the provision and orchestration of business services to enable a faster adoption to changing business demands. Web Services provide a technical foundation to implement this paradigm on the basis of XML-messaging. However, the enhanced flexibility of message-based systems comes along with new threats and risks. To face these issues, a variety of security mechanisms and approaches is supported by the Web Service specifications. The usage of these security mechanisms and protocols is configured by stating security requirements in security policies. However, security policy languages for SOA are complex and difficult to create due to the expressiveness of these languages. To facilitate and simplify the creation of security policies, this thesis presents a model-driven approach that enables the generation of complex security policies on the basis of simple security intentions. SOA architects can specify these intentions in system design models and are not required to deal with complex technical security concepts. The approach introduced in this thesis enables the enhancement of any system design modelling languages - for example FMC or BPMN - with security modelling elements. The syntax, semantics, and notion of these elements is defined by our security modelling language SecureSOA. The metamodel of this language provides extension points to enable the integration into system design modelling languages. In particular, this thesis demonstrates the enhancement of FMC block diagrams with SecureSOA. To enable the model-driven generation of security policies, a domain-independent policy model is introduced in this thesis. This model provides an abstraction layer for security policies. Mappings are used to perform the transformation from our model to security policy languages. However, expert knowledge is required to generate instances of this model on the basis of simple security intentions. Appropriate security mechanisms, protocols and options must be chosen and combined to fulfil these security intentions. In this thesis, a formalised system of security patterns is used to represent this knowledge and to enable an automated transformation process. Moreover, a domain-specific language is introduced to state security patterns in an accessible way. On the basis of this language, a system of security configuration patterns is provided to transform security intentions related to data protection and identity management. The formal semantics of the security pattern language enable the verification of the transformation process introduced in this thesis and prove the correctness of the pattern application. Finally, our SOA Security LAB is presented that demonstrates the application of our model-driven approach to facilitate a dynamic creation, configuration, and execution of secure Web Service-based composed applications.}, language = {en} } @phdthesis{Meyer2015, author = {Meyer, Andreas}, title = {Data perspective in business process management}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-84806}, school = {Universit{\"a}t Potsdam}, pages = {xxi, 362}, year = {2015}, abstract = {Gesch{\"a}ftsprozessmanagement ist ein strukturierter Ansatz zur Modellierung, Analyse, Steuerung und Ausf{\"u}hrung von Gesch{\"a}ftsprozessen, um Gesch{\"a}ftsziele zu erreichen. Es st{\"u}tzt sich dabei auf konzeptionelle Modelle, von denen Prozessmodelle am weitesten verbreitet sind. Prozessmodelle beschreiben wer welche Aufgabe auszuf{\"u}hren hat, um das Gesch{\"a}ftsziel zu erreichen, und welche Informationen daf{\"u}r ben{\"o}tigt werden. Damit beinhalten Prozessmodelle Informationen {\"u}ber den Kontrollfluss, die Zuweisung von Verantwortlichkeiten, den Datenfluss und Informationssysteme. Die Automatisierung von Gesch{\"a}ftsprozessen erh{\"o}ht die Effizienz der Arbeitserledigung und wird durch Process Engines unterst{\"u}tzt. Daf{\"u}r werden jedoch Informationen {\"u}ber den Kontrollfluss, die Zuweisung von Verantwortlichkeiten f{\"u}r Aufgaben und den Datenfluss ben{\"o}tigt. W{\"a}hrend aktuelle Process Engines die ersten beiden Informationen weitgehend automatisiert verarbeiten k{\"o}nnen, m{\"u}ssen Daten manuell implementiert und gewartet werden. Dem entgegen verspricht ein modell-getriebenes Behandeln von Daten eine vereinfachte Implementation in der Process Engine und verringert gleichzeitig die Fehleranf{\"a}lligkeit dank einer graphischen Visualisierung und reduziert den Entwicklungsaufwand durch Codegenerierung. Die vorliegende Dissertation besch{\"a}ftigt sich mit der Modellierung, der Analyse und der Ausf{\"u}hrung von Daten in Gesch{\"a}ftsprozessen. Als formale Basis f{\"u}r die Prozessausf{\"u}hrung wird ein konzeptuelles Framework f{\"u}r die Integration von Prozessen und Daten eingef{\"u}hrt. Dieses Framework wird durch operationelle Semantik erg{\"a}nzt, die mittels einem um Daten erweiterten Petrinetz-Mapping vorgestellt wird. Die modellgetriebene Ausf{\"u}hrung von Daten muss komplexe Datenabh{\"a}ngigkeiten, Prozessdaten und den Datenaustausch ber{\"u}cksichtigen. Letzterer tritt bei der Kommunikation zwischen mehreren Prozessteilnehmern auf. Diese Arbeit nutzt Konzepte aus dem Bereich der Datenbanken und {\"u}berf{\"u}hrt diese ins Gesch{\"a}ftsprozessmanagement, um Datenoperationen zu unterscheiden, um Abh{\"a}ngigkeiten zwischen Datenobjekten des gleichen und verschiedenen Typs zu spezifizieren, um modellierte Datenknoten sowie empfangene Nachrichten zur richtigen laufenden Prozessinstanz zu korrelieren und um Nachrichten f{\"u}r die Prozess{\"u}bergreifende Kommunikation zu generieren. Der entsprechende Ansatz ist nicht auf eine bestimmte Prozessbeschreibungssprache begrenzt und wurde prototypisch implementiert. Die Automatisierung der Datenbehandlung in Gesch{\"a}ftsprozessen erfordert entsprechend annotierte und korrekte Prozessmodelle. Als Unterst{\"u}tzung zur Datenannotierung f{\"u}hrt diese Arbeit einen Algorithmus ein, welcher Informationen {\"u}ber Datenknoten, deren Zust{\"a}nde und Datenabh{\"a}ngigkeiten aus Kontrollflussinformationen extrahiert und die Prozessmodelle entsprechend annotiert. Allerdings k{\"o}nnen gew{\"o}hnlich nicht alle erforderlichen Informationen aus Kontrollflussinformationen extrahiert werden, da detaillierte Angaben {\"u}ber m{\"o}gliche Datenmanipulationen fehlen. Deshalb sind weitere Prozessmodellverfeinerungen notwendig. Basierend auf einer Menge von Objektlebenszyklen kann ein Prozessmodell derart verfeinert werden, dass die in den Objektlebenszyklen spezifizierten Datenmanipulationen automatisiert in ein Prozessmodell {\"u}berf{\"u}hrt werden k{\"o}nnen. Prozessmodelle stellen eine Abstraktion dar. Somit fokussieren sie auf verschiedene Teilbereiche und stellen diese im Detail dar. Solche Detailbereiche sind beispielsweise die Kontrollflusssicht und die Datenflusssicht, welche oft durch Aktivit{\"a}ts-zentrierte beziehungsweise Objekt-zentrierte Prozessmodelle abgebildet werden. In der vorliegenden Arbeit werden Algorithmen zur Transformation zwischen diesen Sichten beschrieben. Zur Sicherstellung der Modellkorrektheit wird das Konzept der „weak conformance" zur {\"U}berpr{\"u}fung der Konsistenz zwischen Objektlebenszyklen und dem Prozessmodell eingef{\"u}hrt. Dabei darf das Prozessmodell nur Datenmanipulationen enthalten, die auch in einem Objektlebenszyklus spezifiziert sind. Die Korrektheit wird mittels Soundness-{\"U}berpr{\"u}fung einer hybriden Darstellung ermittelt, so dass Kontrollfluss- und Datenkorrektheit integriert {\"u}berpr{\"u}ft werden. Um eine korrekte Ausf{\"u}hrung des Prozessmodells zu gew{\"a}hrleisten, m{\"u}ssen gefundene Inkonsistenzen korrigiert werden. Daf{\"u}r werden f{\"u}r jede Inkonsistenz alternative Vorschl{\"a}ge zur Modelladaption identifiziert und vorgeschlagen. Zusammengefasst, unter Einsatz der Ergebnisse dieser Dissertation k{\"o}nnen Gesch{\"a}ftsprozesse modellgetrieben ausgef{\"u}hrt werden unter Ber{\"u}cksichtigung sowohl von Daten als auch den zuvor bereits unterst{\"u}tzten Perspektiven bez{\"u}glich Kontrollfluss und Verantwortlichkeiten. Dabei wird die Modellerstellung teilweise mit automatisierten Algorithmen unterst{\"u}tzt und die Modellkonsistenz durch Datenkorrektheits{\"u}berpr{\"u}fungen gew{\"a}hrleistet.}, language = {en} }