@phdthesis{Dreseler2022, author = {Dreseler, Markus}, title = {Automatic tiering for in-memory database systems}, doi = {10.25932/publishup-55825}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-558253}, school = {Universit{\"a}t Potsdam}, pages = {vii, 143}, year = {2022}, abstract = {A decade ago, it became feasible to store multi-terabyte databases in main memory. These in-memory databases (IMDBs) profit from DRAM's low latency and high throughput as well as from the removal of costly abstractions used in disk-based systems, such as the buffer cache. However, as the DRAM technology approaches physical limits, scaling these databases becomes difficult. Non-volatile memory (NVM) addresses this challenge. This new type of memory is persistent, has more capacity than DRAM (4x), and does not suffer from its density-inhibiting limitations. Yet, as NVM has a higher latency (5-15x) and a lower throughput (0.35x), it cannot fully replace DRAM. IMDBs thus need to navigate the trade-off between the two memory tiers. We present a solution to this optimization problem. Leveraging information about access frequencies and patterns, our solution utilizes NVM's additional capacity while minimizing the associated access costs. Unlike buffer cache-based implementations, our tiering abstraction does not add any costs when reading data from DRAM. As such, it can act as a drop-in replacement for existing IMDBs. Our contributions are as follows: (1) As the foundation for our research, we present Hyrise, an open-source, columnar IMDB that we re-engineered and re-wrote from scratch. Hyrise enables realistic end-to-end benchmarks of SQL workloads and offers query performance which is competitive with other research and commercial systems. At the same time, Hyrise is easy to understand and modify as repeatedly demonstrated by its uses in research and teaching. (2) We present a novel memory management framework for different memory and storage tiers. By encapsulating the allocation and access methods of these tiers, we enable existing data structures to be stored on different tiers with no modifications to their implementation. Besides DRAM and NVM, we also support and evaluate SSDs and have made provisions for upcoming technologies such as disaggregated memory. (3) To identify the parts of the data that can be moved to (s)lower tiers with little performance impact, we present a tracking method that identifies access skew both in the row and column dimensions and that detects patterns within consecutive accesses. Unlike existing methods that have substantial associated costs, our access counters exhibit no identifiable overhead in standard benchmarks despite their increased accuracy. (4) Finally, we introduce a tiering algorithm that optimizes the data placement for a given memory budget. In the TPC-H benchmark, this allows us to move 90\% of the data to NVM while the throughput is reduced by only 10.8\% and the query latency is increased by 11.6\%. With this, we outperform approaches that ignore the workload's access skew and access patterns and increase the query latency by 20\% or more. Individually, our contributions provide novel approaches to current challenges in systems engineering and database research. Combining them allows IMDBs to scale past the limits of DRAM while continuing to profit from the benefits of in-memory computing.}, language = {en} } @phdthesis{Haarmann2022, author = {Haarmann, Stephan}, title = {WICKR: A Joint Semantics for Flexible Processes and Data}, doi = {10.25932/publishup-54613}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-546137}, school = {Universit{\"a}t Potsdam}, pages = {xvii, 191}, year = {2022}, abstract = {Knowledge-intensive business processes are flexible and data-driven. Therefore, traditional process modeling languages do not meet their requirements: These languages focus on highly structured processes in which data plays a minor role. As a result, process-oriented information systems fail to assist knowledge workers on executing their processes. We propose a novel case management approach that combines flexible activity-centric processes with data models, and we provide a joint semantics using colored Petri nets. The approach is suited to model, verify, and enact knowledge-intensive processes and can aid the development of information systems that support knowledge work. Knowledge-intensive processes are human-centered, multi-variant, and data-driven. Typical domains include healthcare, insurances, and law. The processes cannot be fully modeled, since the underlying knowledge is too vast and changes too quickly. Thus, models for knowledge-intensive processes are necessarily underspecified. In fact, a case emerges gradually as knowledge workers make informed decisions. Knowledge work imposes special requirements on modeling and managing respective processes. They include flexibility during design and execution, ad-hoc adaption to unforeseen situations, and the integration of behavior and data. However, the predominantly used process modeling languages (e.g., BPMN) are unsuited for this task. Therefore, novel modeling languages have been proposed. Many of them focus on activities' data requirements and declarative constraints rather than imperative control flow. Fragment-Based Case Management, for example, combines activity-centric imperative process fragments with declarative data requirements. At runtime, fragments can be combined dynamically, and new ones can be added. Yet, no integrated semantics for flexible activity-centric process models and data models exists. In this thesis, Wickr, a novel case modeling approach extending fragment-based Case Management, is presented. It supports batch processing of data, sharing data among cases, and a full-fledged data model with associations and multiplicity constraints. We develop a translational semantics for Wickr targeting (colored) Petri nets. The semantics assert that a case adheres to the constraints in both the process fragments and the data models. Among other things, multiplicity constraints must not be violated. Furthermore, the semantics are extended to multiple cases that operate on shared data. Wickr shows that the data structure may reflect process behavior and vice versa. Based on its semantics, prototypes for executing and verifying case models showcase the feasibility of Wickr. Its applicability to knowledge-intensive and to data-centric processes is evaluated using well-known requirements from related work.}, language = {en} } @phdthesis{Melnichenko2022, author = {Melnichenko, Anna}, title = {Selfish Creation of Realistic Networks}, doi = {10.25932/publishup-54814}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-548141}, school = {Universit{\"a}t Potsdam}, pages = {xi, 175}, year = {2022}, abstract = {Complex networks like the Internet or social networks are fundamental parts of our everyday lives. It is essential to understand their structural properties and how these networks are formed. A game-theoretic approach to network design problems has become of high interest in the last decades. The reason is that many real-world networks are the outcomes of decentralized strategic behavior of independent agents without central coordination. Fabrikant, Luthra, Maneva, Papadimitriou, and Schenker proposed a game-theoretic model aiming to explain the formation of the Internet-like networks. In this model, called the Network Creation Game, agents are associated with nodes of a network. Each agent seeks to maximize her centrality by establishing costly connections to other agents. The model is relatively simple but shows a high potential in modeling complex real-world networks. In this thesis, we contribute to the line of research on variants of the Network Creation Games. Inspired by real-world networks, we propose and analyze several novel network creation models. We aim to understand the impact of certain realistic modeling assumptions on the structure of the created networks and the involved agents' behavior. The first natural additional objective that we consider is the network's robustness. We consider a game where the agents seek to maximize their centrality and, at the same time, the stability of the created network against random edge failure. Our second point of interest is a model that incorporates an underlying geometry. We consider a network creation model where the agents correspond to points in some underlying space and where edge lengths are equal to the distances between the endpoints in that space. The geometric setting captures many physical real-world networks like transport networks and fiber-optic communication networks. We focus on the formation of social networks and consider two models that incorporate particular realistic behavior observed in real-world networks. In the first model, we embed the anti-preferential attachment link formation. Namely, we assume that the cost of the connection is proportional to the popularity of the targeted agent. Our second model is based on the observation that the probability of two persons to connect is inversely proportional to the length of their shortest chain of mutual acquaintances. For each of the four models above, we provide a complete game-theoretical analysis. In particular, we focus on distinctive structural properties of the equilibria, the hardness of computing a best response, the quality of equilibria in comparison to the centrally designed socially optimal networks. We also analyze the game dynamics, i.e., the process of sequential strategic improvements by the agents, and analyze the convergence to an equilibrium state and its properties.}, language = {en} } @phdthesis{Plauth2022, author = {Plauth, Max Frederik}, title = {Improving the Accessibility of Heterogeneous System Resources for Application Developers using Programming Abstractions}, doi = {10.25932/publishup-55811}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-558118}, school = {Universit{\"a}t Potsdam}, pages = {ix, 133}, year = {2022}, abstract = {The heterogeneity of today's state-of-the-art computer architectures is confronting application developers with an immense degree of complexity which results from two major challenges. First, developers need to acquire profound knowledge about the programming models or the interaction models associated with each type of heterogeneous system resource to make efficient use thereof. Second, developers must take into account that heterogeneous system resources always need to exchange data with each other in order to work on a problem together. However, this data exchange is always associated with a certain amount of overhead, which is why the amounts of data exchanged should be kept as low as possible. This thesis proposes three programming abstractions to lessen the burdens imposed by these major challenges with the goal of making heterogeneous system resources accessible to a wider range of application developers. The lib842 compression library provides the first method for accessing the compression and decompression facilities of the NX-842 on-chip compression accelerator available in IBM Power CPUs from user space applications running on Linux. Addressing application development of scale-out GPU workloads, the CloudCL framework makes the resources of GPU clusters more accessible by hiding many aspects of distributed computing while enabling application developers to focus on the aspects of the data parallel programming model associated with GPUs. Furthermore, CloudCL is augmented with transparent data compression facilities based on the lib842 library in order to improve the efficiency of data transfers among cluster nodes. The improved data transfer efficiency provided by the integration of transparent data compression yields performance improvements ranging between 1.11x and 2.07x across four data-intensive scale-out GPU workloads. To investigate the impact of programming abstractions for data placement in NUMA systems, a comprehensive evaluation of the PGASUS framework for NUMA-aware C++ application development is conducted. On a wide range of test systems, the evaluation demonstrates that PGASUS does not only improve the developer experience across all workloads, but that it is also capable of outperforming NUMA-agnostic implementations with average performance improvements of 1.56x. Based on these programming abstractions, this thesis demonstrates that by providing a sufficient degree of abstraction, the accessibility of heterogeneous system resources can be improved for application developers without occluding performance-critical properties of the underlying hardware.}, language = {en} }