@book{AbedjanGolabNaumannetal., author = {Abedjan, Ziawasch and Golab, Lukasz and Naumann, Felix and Papenbrock, Thorsten}, title = {Data Profiling}, series = {Synthesis lectures on data management, 52}, journal = {Synthesis lectures on data management, 52}, publisher = {Morgan \& Claypool Publishers}, address = {San Rafael}, isbn = {978-1-68173-446-0}, pages = {xviii, 136}, language = {en} } @book{AbedjanNaumann2011, author = {Abedjan, Ziawasch and Naumann, Felix}, title = {Advancing the discovery of unique column combinations}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-148-6}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-53564}, publisher = {Universit{\"a}t Potsdam}, pages = {25}, year = {2011}, abstract = {Unique column combinations of a relational database table are sets of columns that contain only unique values. Discovering such combinations is a fundamental research problem and has many different data management and knowledge discovery applications. Existing discovery algorithms are either brute force or have a high memory load and can thus be applied only to small datasets or samples. In this paper, the wellknown GORDIAN algorithm and "Apriori-based" algorithms are compared and analyzed for further optimization. We greatly improve the Apriori algorithms through efficient candidate generation and statistics-based pruning methods. A hybrid solution HCAGORDIAN combines the advantages of GORDIAN and our new algorithm HCA, and it significantly outperforms all previous work in many situations.}, language = {en} } @book{AlbrechtNaumann2012, author = {Albrecht, Alexander and Naumann, Felix}, title = {Understanding cryptic schemata in large extract-transform-load systems}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-201-8}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-61257}, publisher = {Universit{\"a}t Potsdam}, pages = {19}, year = {2012}, abstract = {Extract-Transform-Load (ETL) tools are used for the creation, maintenance, and evolution of data warehouses, data marts, and operational data stores. ETL workflows populate those systems with data from various data sources by specifying and executing a DAG of transformations. Over time, hundreds of individual workflows evolve as new sources and new requirements are integrated into the system. The maintenance and evolution of large-scale ETL systems requires much time and manual effort. A key problem is to understand the meaning of unfamiliar attribute labels in source and target databases and ETL transformations. Hard-to-understand attribute labels lead to frustration and time spent to develop and understand ETL workflows. We present a schema decryption technique to support ETL developers in understanding cryptic schemata of sources, targets, and ETL transformations. For a given ETL system, our recommender-like approach leverages the large number of mapped attribute labels in existing ETL workflows to produce good and meaningful decryptions. In this way we are able to decrypt attribute labels consisting of a number of unfamiliar few-letter abbreviations, such as UNP_PEN_INT, which we can decrypt to UNPAID_PENALTY_INTEREST. We evaluate our schema decryption approach on three real-world repositories of ETL workflows and show that our approach is able to suggest high-quality decryptions for cryptic attribute labels in a given schema.}, language = {en} } @book{AlnemrPolyvyanyyAbuJarouretal.2010, author = {Alnemr, Rehab and Polyvyanyy, Artem and AbuJarour, Mohammed and Appeltauer, Malte and Hildebrandt, Dieter and Thomas, Ivonne and Overdick, Hagen and Sch{\"o}bel, Michael and Uflacker, Matthias and Kluth, Stephan and Menzel, Michael and Schmidt, Alexander and Hagedorn, Benjamin and Pascalau, Emilian and Perscheid, Michael and Vogel, Thomas and Hentschel, Uwe and Feinbube, Frank and Kowark, Thomas and Tr{\"u}mper, Jonas and Vogel, Tobias and Becker, Basil}, title = {Proceedings of the 4th Ph.D. Retreat of the HPI Research School on Service-oriented Systems Engineering}, editor = {Meinel, Christoph and Plattner, Hasso and D{\"o}llner, J{\"u}rgen Roland Friedrich and Weske, Mathias and Polze, Andreas and Hirschfeld, Robert and Naumann, Felix and Giese, Holger}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-036-6}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-40838}, publisher = {Universit{\"a}t Potsdam}, pages = {Getr. Z{\"a}hlung}, year = {2010}, language = {en} } @book{AppeltauerHirschfeld2012, author = {Appeltauer, Malte and Hirschfeld, Robert}, title = {The JCop language specification : Version 1.0, April 2012}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-193-6}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-60208}, publisher = {Universit{\"a}t Potsdam}, pages = {iv, 48}, year = {2012}, abstract = {Program behavior that relies on contextual information, such as physical location or network accessibility, is common in today's applications, yet its representation is not sufficiently supported by programming languages. With context-oriented programming (COP), such context-dependent behavioral variations can be explicitly modularized and dynamically activated. In general, COP could be used to manage any context-specific behavior. However, its contemporary realizations limit the control of dynamic adaptation. This, in turn, limits the interaction of COP's adaptation mechanisms with widely used architectures, such as event-based, mobile, and distributed programming. The JCop programming language extends Java with language constructs for context-oriented programming and additionally provides a domain-specific aspect language for declarative control over runtime adaptations. As a result, these redesigned implementations are more concise and better modularized than their counterparts using plain COP. JCop's main features have been described in our previous publications. However, a complete language specification has not been presented so far. This report presents the entire JCop language including the syntax and semantics of its new language constructs.}, language = {en} } @book{BauckmannAbedjanLeseretal.2012, author = {Bauckmann, Jana and Abedjan, Ziawasch and Leser, Ulf and M{\"u}ller, Heiko and Naumann, Felix}, title = {Covering or complete? : Discovering conditional inclusion dependencies}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-212-4}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-62089}, publisher = {Universit{\"a}t Potsdam}, pages = {34}, year = {2012}, abstract = {Data dependencies, or integrity constraints, are used to improve the quality of a database schema, to optimize queries, and to ensure consistency in a database. In the last years conditional dependencies have been introduced to analyze and improve data quality. In short, a conditional dependency is a dependency with a limited scope defined by conditions over one or more attributes. Only the matching part of the instance must adhere to the dependency. In this paper we focus on conditional inclusion dependencies (CINDs). We generalize the definition of CINDs, distinguishing covering and completeness conditions. We present a new use case for such CINDs showing their value for solving complex data quality tasks. Further, we define quality measures for conditions inspired by precision and recall. We propose efficient algorithms that identify covering and completeness conditions conforming to given quality thresholds. Our algorithms choose not only the condition values but also the condition attributes automatically. Finally, we show that our approach efficiently provides meaningful and helpful results for our use case.}, language = {en} } @book{BauckmannLeserNaumann2010, author = {Bauckmann, Jana and Leser, Ulf and Naumann, Felix}, title = {Efficient and exact computation of inclusion dependencies for data integration}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-048-9}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-41396}, publisher = {Universit{\"a}t Potsdam}, pages = {36}, year = {2010}, abstract = {Data obtained from foreign data sources often come with only superficial structural information, such as relation names and attribute names. Other types of metadata that are important for effective integration and meaningful querying of such data sets are missing. In particular, relationships among attributes, such as foreign keys, are crucial metadata for understanding the structure of an unknown database. The discovery of such relationships is difficult, because in principle for each pair of attributes in the database each pair of data values must be compared. A precondition for a foreign key is an inclusion dependency (IND) between the key and the foreign key attributes. We present with Spider an algorithm that efficiently finds all INDs in a given relational database. It leverages the sorting facilities of DBMS but performs the actual comparisons outside of the database to save computation. Spider analyzes very large databases up to an order of magnitude faster than previous approaches. We also evaluate in detail the effectiveness of several heuristics to reduce the number of necessary comparisons. Furthermore, we generalize Spider to find composite INDs covering multiple attributes, and partial INDs, which are true INDs for all but a certain number of values. This last type is particularly relevant when integrating dirty data as is often the case in the life sciences domain - our driving motivation.}, language = {en} } @book{BeckerGiese2012, author = {Becker, Basil and Giese, Holger}, title = {Cyber-physical systems with dynamic structure : towards modeling and verification of inductive invariants}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-217-9}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-62437}, publisher = {Universit{\"a}t Potsdam}, pages = {iv, 27}, year = {2012}, abstract = {Cyber-physical systems achieve sophisticated system behavior exploring the tight interconnection of physical coupling present in classical engineering systems and information technology based coupling. A particular challenging case are systems where these cyber-physical systems are formed ad hoc according to the specific local topology, the available networking capabilities, and the goals and constraints of the subsystems captured by the information processing part. In this paper we present a formalism that permits to model the sketched class of cyber-physical systems. The ad hoc formation of tightly coupled subsystems of arbitrary size are specified using a UML-based graph transformation system approach. Differential equations are employed to define the resulting tightly coupled behavior. Together, both form hybrid graph transformation systems where the graph transformation rules define the discrete steps where the topology or modes may change, while the differential equations capture the continuous behavior in between such discrete changes. In addition, we demonstrate that automated analysis techniques known for timed graph transformation systems for inductive invariants can be extended to also cover the hybrid case for an expressive case of hybrid models where the formed tightly coupled subsystems are restricted to smaller local networks.}, language = {en} } @book{BeckerGieseNeumann2009, author = {Becker, Basil and Giese, Holger and Neumann, Stefan}, title = {Correct dynamic service-oriented architectures : modeling and compositional verification with dynamic collaborations}, organization = {System Analysis and Modeling Group}, isbn = {978-3-940793-91-1}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-30473}, publisher = {Universit{\"a}t Potsdam}, year = {2009}, abstract = {Service-oriented modeling employs collaborations to capture the coordination of multiple roles in form of service contracts. In case of dynamic collaborations the roles may join and leave the collaboration at runtime and therefore complex structural dynamics can result, which makes it very hard to ensure their correct and safe operation. We present in this paper our approach for modeling and verifying such dynamic collaborations. Modeling is supported using a well-defined subset of UML class diagrams, behavioral rules for the structural dynamics, and UML state machines for the role behavior. To be also able to verify the resulting service-oriented systems, we extended our former results for the automated verification of systems with structural dynamics [7, 8] and developed a compositional reasoning scheme, which enables the reuse of verification results. We outline our approach using the example of autonomous vehicles that use such dynamic collaborations via ad-hoc networking to coordinate and optimize their joint behavior.}, language = {en} } @book{BeyhlBlouinGieseetal.2016, author = {Beyhl, Thomas and Blouin, Dominique and Giese, Holger and Lambers, Leen}, title = {On the operationalization of graph queries with generalized discrimination networks}, number = {106}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-372-5}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-96279}, publisher = {Universit{\"a}t Potsdam}, pages = {33}, year = {2016}, abstract = {Graph queries have lately gained increased interest due to application areas such as social networks, biological networks, or model queries. For the relational database case the relational algebra and generalized discrimination networks have been studied to find appropriate decompositions into subqueries and ordering of these subqueries for query evaluation or incremental updates of query results. For graph database queries however there is no formal underpinning yet that allows us to find such suitable operationalizations. Consequently, we suggest a simple operational concept for the decomposition of arbitrary complex queries into simpler subqueries and the ordering of these subqueries in form of generalized discrimination networks for graph queries inspired by the relational case. The approach employs graph transformation rules for the nodes of the network and thus we can employ the underlying theory. We further show that the proposed generalized discrimination networks have the same expressive power as nested graph conditions.}, language = {en} }