@book{MeinelDoellnerWeskeetal.2021, author = {Meinel, Christoph and D{\"o}llner, J{\"u}rgen Roland Friedrich and Weske, Mathias and Polze, Andreas and Hirschfeld, Robert and Naumann, Felix and Giese, Holger and Baudisch, Patrick and Friedrich, Tobias and B{\"o}ttinger, Erwin and Lippert, Christoph and D{\"o}rr, Christian and Lehmann, Anja and Renard, Bernhard and Rabl, Tilmann and Uebernickel, Falk and Arnrich, Bert and H{\"o}lzle, Katharina}, title = {Proceedings of the HPI Research School on Service-oriented Systems Engineering 2020 Fall Retreat}, number = {138}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-513-2}, issn = {1613-5652}, doi = {10.25932/publishup-50413}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-504132}, publisher = {Universit{\"a}t Potsdam}, pages = {vi, 144}, year = {2021}, abstract = {Design and Implementation of service-oriented architectures imposes a huge number of research questions from the fields of software engineering, system analysis and modeling, adaptability, and application integration. Component orientation and web services are two approaches for design and realization of complex web-based system. Both approaches allow for dynamic application adaptation as well as integration of enterprise application. Service-Oriented Systems Engineering represents a symbiosis of best practices in object-orientation, component-based development, distributed computing, and business process management. It provides integration of business and IT concerns. The annual Ph.D. Retreat of the Research School provides each member the opportunity to present his/her current state of their research and to give an outline of a prospective Ph.D. thesis. Due to the interdisciplinary structure of the research school, this technical report covers a wide range of topics. These include but are not limited to: Human Computer Interaction and Computer Vision as Service; Service-oriented Geovisualization Systems; Algorithm Engineering for Service-oriented Systems; Modeling and Verification of Self-adaptive Service-oriented Systems; Tools and Methods for Software Engineering in Service-oriented Systems; Security Engineering of Service-based IT Systems; Service-oriented Information Systems; Evolutionary Transition of Enterprise Applications to Service Orientation; Operating System Abstractions for Service-oriented Computing; and Services Specification, Composition, and Enactment.}, language = {en} } @article{KunftKatsifodimosSchelteretal.2019, author = {Kunft, Andreas and Katsifodimos, Asterios and Schelter, Sebastian and Bress, Sebastian and Rabl, Tilmann and Markl, Volker}, title = {An Intermediate Representation for Optimizing Machine Learning Pipelines}, series = {Proceedings of the VLDB Endowment}, volume = {12}, journal = {Proceedings of the VLDB Endowment}, number = {11}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {2150-8097}, doi = {10.14778/3342263.3342633}, pages = {1553 -- 1567}, year = {2019}, abstract = {Machine learning (ML) pipelines for model training and validation typically include preprocessing, such as data cleaning and feature engineering, prior to training an ML model. Preprocessing combines relational algebra and user-defined functions (UDFs), while model training uses iterations and linear algebra. Current systems are tailored to either of the two. As a consequence, preprocessing and ML steps are optimized in isolation. To enable holistic optimization of ML training pipelines, we present Lara, a declarative domain-specific language for collections and matrices. Lara's inter-mediate representation (IR) reflects on the complete program, i.e., UDFs, control flow, and both data types. Two views on the IR enable diverse optimizations. Monads enable operator pushdown and fusion across type and loop boundaries. Combinators provide the semantics of domain-specific operators and optimize data access and cross-validation of ML algorithms. Our experiments on preprocessing pipelines and selected ML algorithms show the effects of our proposed optimizations on dense and sparse data, which achieve speedups of up to an order of magnitude.}, language = {en} } @article{KaitouaRablMarkl2020, author = {Kaitoua, Abdulrahman and Rabl, Tilmann and Markl, Volker}, title = {A distributed data exchange engine for polystores}, series = {Information technology : methods and applications of informatics and information technology}, volume = {62}, journal = {Information technology : methods and applications of informatics and information technology}, number = {3-4}, publisher = {De Gruyter}, address = {Berlin}, issn = {1611-2776}, doi = {10.1515/itit-2019-0037}, pages = {145 -- 156}, year = {2020}, abstract = {There is an increasing interest in fusing data from heterogeneous sources. Combining data sources increases the utility of existing datasets, generating new information and creating services of higher quality. A central issue in working with heterogeneous sources is data migration: In order to share and process data in different engines, resource intensive and complex movements and transformations between computing engines, services, and stores are necessary. Muses is a distributed, high-performance data migration engine that is able to interconnect distributed data stores by forwarding, transforming, repartitioning, or broadcasting data among distributed engines' instances in a resource-, cost-, and performance-adaptive manner. As such, it performs seamless information sharing across all participating resources in a standard, modular manner. We show an overall improvement of 30 \% for pipelining jobs across multiple engines, even when we count the overhead of Muses in the execution time. This performance gain implies that Muses can be used to optimise large pipelines that leverage multiple engines.}, language = {en} } @article{GevayRablBressetal.2022, author = {G{\´e}vay, G{\´a}bor E. and Rabl, Tilmann and Breß, Sebastian and Madai-Tahy, Lor{\´a}nd and Quian{\´e}-Ruiz, Jorge-Arnulfo and Markl, Volker}, title = {Imperative or functional control flow handling}, series = {SIGMOD record / Association for Computing Machinery, Special Interest Group on Management of Data}, volume = {51}, journal = {SIGMOD record / Association for Computing Machinery, Special Interest Group on Management of Data}, number = {1}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {0163-5808}, doi = {10.1145/3542700.3542715}, pages = {60 -- 67}, year = {2022}, abstract = {Modern data analysis tasks often involve control flow statements, such as the iterations in PageRank and K-means. To achieve scalability, developers usually implement these tasks in distributed dataflow systems, such as Spark and Flink. Designers of such systems have to choose between providing imperative or functional control flow constructs to users. Imperative constructs are easier to use, but functional constructs are easier to compile to an efficient dataflow job. We propose Mitos, a system where control flow is both easy to use and efficient. Mitos relies on an intermediate representation based on the static single assignment form. This allows us to abstract away from specific control flow constructs and treat any imperative control flow uniformly both when building the dataflow job and when coordinating the distributed execution.}, language = {en} } @article{GevayRablBressetal.2022, author = {Gevay, Gabor E. and Rabl, Tilmann and Bress, Sebastian and Maclai-Tahy, Lorand and Quiane-Ruiz, Jorge-Arnulfo and Markl, Volker}, title = {Imperative or Functional Control Flow Handling: Why not the Best of Both Worlds?}, series = {SIGMOD record}, volume = {51}, journal = {SIGMOD record}, number = {1}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {0163-5808}, doi = {10.1109/ICDE51399.2021.00127}, pages = {60 -- 67}, year = {2022}, abstract = {Modern data analysis tasks often involve control flow statements, such as the iterations in PageRank and K-means. To achieve scalability, developers usually implement these tasks in distributed dataflow systems, such as Spark and Flink. Designers of such systems have to choose between providing imperative or functional control flow constructs to users. Imperative constructs are easier to use, but functional constructs are easier to compile to an efficient dataflow job. We propose Mitos, a system where control flow is both easy to use and efficient. Mitos relies on an intermediate representation based on the static single assignment form. This allows us to abstract away from specific control flow constructs and treat any imperative control flow uniformly both when building the dataflow job and when coordinating the distributed execution.}, language = {en} } @article{DreselerBoissierRabletal.2020, author = {Dreseler, Markus and Boissier, Martin and Rabl, Tilmann and Uflacker, Matthias}, title = {Quantifying TPC-H choke points and their optimizations}, series = {Proceedings of the VLDB Endowment}, volume = {13}, journal = {Proceedings of the VLDB Endowment}, number = {8}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {2150-8097}, doi = {10.14778/3389133.3389138}, pages = {1206 -- 1220}, year = {2020}, abstract = {TPC-H continues to be the most widely used benchmark for relational OLAP systems. It poses a number of challenges, also known as "choke points", which database systems have to solve in order to achieve good benchmark results. Examples include joins across multiple tables, correlated subqueries, and correlations within the TPC-H data set. Knowing the impact of such optimizations helps in developing optimizers as well as in interpreting TPC-H results across database systems. This paper provides a systematic analysis of choke points and their optimizations. It complements previous work on TPC-H choke points by providing a quantitative discussion of their relevance. It focuses on eleven choke points where the optimizations are beneficial independently of the database system. Of these, the flattening of subqueries and the placement of predicates have the biggest impact. Three queries (Q2, Q17, and Q21) are strongly ifluenced by the choice of an efficient query plan; three others (Q1, Q13, and Q18) are less influenced by plan optimizations and more dependent on an efficient execution engine.}, language = {en} } @misc{BensonMakaitRabl2021, author = {Benson, Lawrence and Makait, Hendrik and Rabl, Tilmann}, title = {Viper}, series = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, journal = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, number = {9}, issn = {2150-8097}, doi = {10.25932/publishup-55966}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-559664}, pages = {15}, year = {2021}, abstract = {Key-value stores (KVSs) have found wide application in modern software systems. For persistence, their data resides in slow secondary storage, which requires KVSs to employ various techniques to increase their read and write performance from and to the underlying medium. Emerging persistent memory (PMem) technologies offer data persistence at close-to-DRAM speed, making them a promising alternative to classical disk-based storage. However, simply drop-in replacing existing storage with PMem does not yield good results, as block-based access behaves differently in PMem than on disk and ignores PMem's byte addressability, layout, and unique performance characteristics. In this paper, we propose three PMem-specific access patterns and implement them in a hybrid PMem-DRAM KVS called Viper. We employ a DRAM-based hash index and a PMem-aware storage layout to utilize the random-write speed of DRAM and efficient sequential-write performance PMem. Our evaluation shows that Viper significantly outperforms existing KVSs for core KVS operations while providing full data persistence. Moreover, Viper outperforms existing PMem-only, hybrid, and disk-based KVSs by 4-18x for write workloads, while matching or surpassing their get performance.}, language = {en} } @article{BensonMakaitRabl2021, author = {Benson, Lawrence and Makait, Hendrik and Rabl, Tilmann}, title = {Viper}, series = {Proceedings of the VLDB Endowment}, volume = {14}, journal = {Proceedings of the VLDB Endowment}, number = {9}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {2150-8097}, doi = {10.14778/3461535.3461543}, pages = {1544 -- 1556}, year = {2021}, abstract = {Key-value stores (KVSs) have found wide application in modern software systems. For persistence, their data resides in slow secondary storage, which requires KVSs to employ various techniques to increase their read and write performance from and to the underlying medium. Emerging persistent memory (PMem) technologies offer data persistence at close-to-DRAM speed, making them a promising alternative to classical disk-based storage. However, simply drop-in replacing existing storage with PMem does not yield good results, as block-based access behaves differently in PMem than on disk and ignores PMem's byte addressability, layout, and unique performance characteristics. In this paper, we propose three PMem-specific access patterns and implement them in a hybrid PMem-DRAM KVS called Viper. We employ a DRAM-based hash index and a PMem-aware storage layout to utilize the random-write speed of DRAM and efficient sequential-write performance PMem. Our evaluation shows that Viper significantly outperforms existing KVSs for core KVS operations while providing full data persistence. Moreover, Viper outperforms existing PMem-only, hybrid, and disk-based KVSs by 4-18x for write workloads, while matching or surpassing their get performance.}, language = {en} }