@phdthesis{Bazhenova2018, author = {Bazhenova, Ekaterina}, title = {Discovery of Decision Models Complementary to Process Models}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-410020}, school = {Universit{\"a}t Potsdam}, year = {2018}, abstract = {Business process management is an acknowledged asset for running an organization in a productive and sustainable way. One of the most important aspects of business process management, occurring on a daily basis at all levels, is decision making. In recent years, a number of decision management frameworks have appeared in addition to existing business process management systems. More recently, Decision Model and Notation (DMN) was developed by the OMG consortium with the aim of complementing the widely used Business Process Model and Notation (BPMN). One of the reasons for the emergence of DMN is the increasing interest in the evolving paradigm known as the separation of concerns. This paradigm states that modeling decisions complementary to processes reduces process complexity by externalizing decision logic from process models and importing it into a dedicated decision model. Such an approach increases the agility of model design and execution. This provides organizations with the flexibility to adapt to the ever increasing rapid and dynamic changes in the business ecosystem. The research gap, identified by us, is that the separation of concerns, recommended by DMN, prescribes the externalization of the decision logic of process models in one or more separate decision models, but it does not specify this can be achieved. The goal of this thesis is to overcome the presented gap by developing a framework for discovering decision models in a semi-automated way from information about existing process decision making. Thus, in this thesis we develop methodologies to extract decision models from: (1) control flow and data of process models that exist in enterprises; and (2) from event logs recorded by enterprise information systems, encapsulating day-to-day operations. Furthermore, we provide an extension of the methodologies to discover decision models from event logs enriched with fuzziness, a tool dealing with partial knowledge of the process execution information. All the proposed techniques are implemented and evaluated in case studies using real-life and synthetic process models and event logs. The evaluation of these case studies shows that the proposed methodologies provide valid and accurate output decision models that can serve as blueprints for executing decisions complementary to process models. Thus, these methodologies have applicability in the real world and they can be used, for example, for compliance checks, among other uses, which could improve the organization's decision making and hence it's overall performance.}, language = {en} } @phdthesis{Vogel2018, author = {Vogel, Thomas}, title = {Model-driven engineering of self-adaptive software}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-409755}, school = {Universit{\"a}t Potsdam}, pages = {xvi, 357}, year = {2018}, abstract = {The development of self-adaptive software requires the engineering of an adaptation engine that controls the underlying adaptable software by a feedback loop. State-of-the-art approaches prescribe the feedback loop in terms of numbers, how the activities (e.g., monitor, analyze, plan, and execute (MAPE)) and the knowledge are structured to a feedback loop, and the type of knowledge. Moreover, the feedback loop is usually hidden in the implementation or framework and therefore not visible in the architectural design. Additionally, an adaptation engine often employs runtime models that either represent the adaptable software or capture strategic knowledge such as reconfiguration strategies. State-of-the-art approaches do not systematically address the interplay of such runtime models, which would otherwise allow developers to freely design the entire feedback loop. This thesis presents ExecUtable RuntimE MegAmodels (EUREMA), an integrated model-driven engineering (MDE) solution that rigorously uses models for engineering feedback loops. EUREMA provides a domain-specific modeling language to specify and an interpreter to execute feedback loops. The language allows developers to freely design a feedback loop concerning the activities and runtime models (knowledge) as well as the number of feedback loops. It further supports structuring the feedback loops in the adaptation engine that follows a layered architectural style. Thus, EUREMA makes the feedback loops explicit in the design and enables developers to reason about design decisions. To address the interplay of runtime models, we propose the concept of a runtime megamodel, which is a runtime model that contains other runtime models as well as activities (e.g., MAPE) working on the contained models. This concept is the underlying principle of EUREMA. The resulting EUREMA (mega)models are kept alive at runtime and they are directly executed by the EUREMA interpreter to run the feedback loops. Interpretation provides the flexibility to dynamically adapt a feedback loop. In this context, EUREMA supports engineering self-adaptive software in which feedback loops run independently or in a coordinated fashion within the same layer as well as on top of each other in different layers of the adaptation engine. Moreover, we consider preliminary means to evolve self-adaptive software by providing a maintenance interface to the adaptation engine. This thesis discusses in detail EUREMA by applying it to different scenarios such as single, multiple, and stacked feedback loops for self-repairing and self-optimizing the mRUBiS application. Moreover, it investigates the design and expressiveness of EUREMA, reports on experiments with a running system (mRUBiS) and with alternative solutions, and assesses EUREMA with respect to quality attributes such as performance and scalability. The conducted evaluation provides evidence that EUREMA as an integrated and open MDE approach for engineering self-adaptive software seamlessly integrates the development and runtime environments using the same formalism to specify and execute feedback loops, supports the dynamic adaptation of feedback loops in layered architectures, and achieves an efficient execution of feedback loops by leveraging incrementality.}, language = {en} } @inproceedings{OPUS4-40678, title = {HPI Future SOC Lab}, editor = {Meinel, Christoph and Polze, Andreas and Oswald, Gerhard and Strotmann, Rolf and Seibold, Ulrich and Schulzki, Bernhard}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-406787}, pages = {iii, 180}, year = {2016}, abstract = {The "HPI Future SOC Lab" is a cooperation of the Hasso Plattner Institute (HPI) and industrial partners. Its mission is to enable and promote exchange and interaction between the research community and the industrial partners. The HPI Future SOC Lab provides researchers with free of charge access to a complete infrastructure of state of the art hard and software. This infrastructure includes components, which might be too expensive for an ordinary research environment, such as servers with up to 64 cores and 2 TB main memory. The offerings address researchers particularly from but not limited to the areas of computer science and business information systems. Main areas of research include cloud computing, parallelization, and In-Memory technologies. This technical report presents results of research projects executed in 2016. Selected projects have presented their results on April 5th and November 3th 2016 at the Future SOC Lab Day events.}, language = {en} } @phdthesis{Pufahl2018, author = {Pufahl, Luise}, title = {Modeling and executing batch activities in business processes}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-408013}, school = {Universit{\"a}t Potsdam}, pages = {xix, 163}, year = {2018}, abstract = {Business process automation improves organizations' efficiency to perform work. Therefore, a business process is first documented as a process model which then serves as blueprint for a number of process instances representing the execution of specific business cases. In existing business process management systems, process instances run independently from each other. However, in practice, instances are also collected in groups at certain process activities for a combined execution to improve the process performance. Currently, this so-called batch processing is executed manually or supported by external software. Only few research proposals exist to explicitly represent and execute batch processing needs in business process models. These works also lack a comprehensive understanding of requirements. This thesis addresses the described issues by providing a basic concept, called batch activity. It allows an explicit representation of batch processing configurations in process models and provides a corresponding execution semantics, thereby easing automation. The batch activity groups different process instances based on their data context and can synchronize their execution over one or as well multiple process activities. The concept is conceived based on a requirements analysis considering existing literature on batch processing from different domains and industry examples. Further, this thesis provides two extensions: First, a flexible batch configuration concept, based on event processing techniques, is introduced to allow run time adaptations of batch configurations. Second, a concept for collecting and batching activity instances of multiple different process models is given. Thereby, the batch configuration is centrally defined, independently of the process models, which is especially beneficial for organizations with large process model collections. This thesis provides a technical evaluation as well as a validation of the presented concepts. A prototypical implementation in an existing open-source BPMS shows that with a few extensions, batch processing is enabled. Further, it demonstrates that the consolidated view of several work items in one user form can improve work efficiency. The validation, in which the batch activity concept is applied to different use cases in a simulated environment, implies cost-savings for business processes when a suitable batch configuration is used. For the validation, an extensible business process simulator was developed. It enables process designers to study the influence of a batch activity in a process with regards to its performance.}, language = {en} } @phdthesis{Che2017, author = {Che, Xiaoyin}, title = {E-lecture material enhancement based on automatic multimedia analysis}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-408224}, school = {Universit{\"a}t Potsdam}, pages = {xviii, 148}, year = {2017}, abstract = {In this era of high-speed informatization and globalization, online education is no longer an exquisite concept in the ivory tower, but a rapidly developing industry closely relevant to people's daily lives. Numerous lectures are recorded in form of multimedia data, uploaded to the Internet and made publicly accessible from anywhere in this world. These lectures are generally addressed as e-lectures. In recent year, a new popular form of e-lectures, the Massive Open Online Courses (MOOCs), boosts the growth of online education industry and somehow turns "learning online" into a fashion. As an e-learning provider, besides to keep improving the quality of e-lecture content, to provide better learning environment for online learners is also a highly important task. This task can be preceded in various ways, and one of them is to enhance and upgrade the learning materials provided: e-lectures could be more than videos. Moreover, this process of enhancement or upgrading should be done automatically, without giving extra burdens to the lecturers or teaching teams, and this is the aim of this thesis. The first part of this thesis is an integrated framework of multi-lingual subtitles production, which can help online learners penetrate the language barrier. The framework consists of Automatic Speech Recognition (ASR), Sentence Boundary Detection (SBD) and Machine Translation (MT), among which the proposed SBD solution is major technical contribution, building on Deep Neural Network (DNN) and Word Vector (WV) and achieving state-of-the-art performance. Besides, a quantitative evaluation with dozens of volunteers is also introduced to measure how these auto-generated subtitles could actually help in context of e-lectures. Secondly, a technical solution "TOG" (Tree-Structure Outline Generation) is proposed to extract textual content from the displaying slides recorded in video and re-organize them into a hierarchical lecture outline, which may serve in multiple functions, such like preview, navigation and retrieval. TOG runs adaptively and can be roughly divided into intra-slide and inter-slides phases. Table detection and lecture video segmentation can be implemented as sub- or post-application in these two phases respectively. Evaluation on diverse e-lectures shows that all the outlines, tables and segments achieved are trustworthily accurate. Based on the subtitles and outlines previously created, lecture videos can be further split into sentence units and slide-based segment units. A lecture highlighting process is further applied on these units, in order to capture and mark the most important parts within the corresponding lecture, just as what people do with a pen when reading paper books. Sentence-level highlighting depends on the acoustic analysis on the audio track, while segment-level highlighting focuses on exploring clues from the statistical information of related transcripts and slide content. Both objective and subjective evaluations prove that the proposed lecture highlighting solution is with decent precision and welcomed by users. All above enhanced e-lecture materials have been already implemented in actual use or made available for implementation by convenient interfaces.}, language = {en} } @phdthesis{Papenbrock2017, author = {Papenbrock, Thorsten}, title = {Data profiling - efficient discovery of dependencies}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-406705}, school = {Universit{\"a}t Potsdam}, pages = {viii, ii, 141}, year = {2017}, abstract = {Data profiling is the computer science discipline of analyzing a given dataset for its metadata. The types of metadata range from basic statistics, such as tuple counts, column aggregations, and value distributions, to much more complex structures, in particular inclusion dependencies (INDs), unique column combinations (UCCs), and functional dependencies (FDs). If present, these statistics and structures serve to efficiently store, query, change, and understand the data. Most datasets, however, do not provide their metadata explicitly so that data scientists need to profile them. While basic statistics are relatively easy to calculate, more complex structures present difficult, mostly NP-complete discovery tasks; even with good domain knowledge, it is hardly possible to detect them manually. Therefore, various profiling algorithms have been developed to automate the discovery. None of them, however, can process datasets of typical real-world size, because their resource consumptions and/or execution times exceed effective limits. In this thesis, we propose novel profiling algorithms that automatically discover the three most popular types of complex metadata, namely INDs, UCCs, and FDs, which all describe different kinds of key dependencies. The task is to extract all valid occurrences from a given relational instance. The three algorithms build upon known techniques from related work and complement them with algorithmic paradigms, such as divide \& conquer, hybrid search, progressivity, memory sensitivity, parallelization, and additional pruning to greatly improve upon current limitations. Our experiments show that the proposed algorithms are orders of magnitude faster than related work. They are, in particular, now able to process datasets of real-world, i.e., multiple gigabytes size with reasonable memory and time consumption. Due to the importance of data profiling in practice, industry has built various profiling tools to support data scientists in their quest for metadata. These tools provide good support for basic statistics and they are also able to validate individual dependencies, but they lack real discovery features even though some fundamental discovery techniques are known for more than 15 years. To close this gap, we developed Metanome, an extensible profiling platform that incorporates not only our own algorithms but also many further algorithms from other researchers. With Metanome, we make our research accessible to all data scientists and IT-professionals that are tasked with data profiling. Besides the actual metadata discovery, the platform also offers support for the ranking and visualization of metadata result sets. Being able to discover the entire set of syntactically valid metadata naturally introduces the subsequent task of extracting only the semantically meaningful parts. This is challenge, because the complete metadata results are surprisingly large (sometimes larger than the datasets itself) and judging their use case dependent semantic relevance is difficult. To show that the completeness of these metadata sets is extremely valuable for their usage, we finally exemplify the efficient processing and effective assessment of functional dependencies for the use case of schema normalization.}, language = {en} } @misc{GieseHenklerHirsch2017, author = {Giese, Holger and Henkler, Stefan and Hirsch, Martin}, title = {A multi-paradigm approach supporting the modular execution of reconfigurable hybrid systems}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-402896}, pages = {34}, year = {2017}, abstract = {Advanced mechatronic systems have to integrate existing technologies from mechanical, electrical and software engineering. They must be able to adapt their structure and behavior at runtime by reconfiguration to react flexibly to changes in the environment. Therefore, a tight integration of structural and behavioral models of the different domains is required. This integration results in complex reconfigurable hybrid systems, the execution logic of which cannot be addressed directly with existing standard modeling, simulation, and code-generation techniques. We present in this paper how our component-based approach for reconfigurable mechatronic systems, M ECHATRONIC UML, efficiently handles the complex interplay of discrete behavior and continuous behavior in a modular manner. In addition, its extension to even more flexible reconfiguration cases is presented.}, language = {en} } @misc{ChujfiMeinel2017, author = {Chujfi, Salim and Meinel, Christoph}, title = {Patterns to explore cognitive preferences and potential collective intelligence empathy for processing knowledge in virtual settings}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-401789}, pages = {16}, year = {2017}, abstract = {Organizations continue building virtual working teams (Teleworkers) to become more dynamic as part of their strategic innovation, with great benefits to individuals, business and society. However, during such transformations it is important to note that effective knowledge communication is particularly difficult in distributed environments as well as in non-interactive settings, because the interlocutors cannot use gestures or mimicry and have to adapt their expressions without receiving any feedback, which may affect the creation of tacit knowledge. Collective Intelligence appears to be an encouraging alternative for creating knowledge. However, in this scenario it faces an important goal to be achieved, as the degree of ability of two or more individuals increases with the need to overcome barriers through the aggregation of separately processed information, whereby all actors follow similar conditions to participate in the collective. Geographically distributed organizations have the great challenge of managing people's knowledge, not only to keep operations running, but also to promote innovation within the organization in the creation of new knowledge. The management of knowledge from Collective Intelligence represents a big difference from traditional methods of information allocation, since managing Collective Intelligence poses new requirements. For instance, semantic analysis has to merge information, coming both from the content itself and the social/individual context, and in addition, the social dynamics that emerge online have to be taken into account. This study analyses how knowledge-based organizations working with decentralized staff may need to consider the cognitive styles and social behaviors of individuals participating in their programs to effectively manage knowledge in virtual settings. It also proposes assessment taxonomies to analyze online comportments at the levels of the individual and community, in order to successfully identify characteristics to help evaluate higher effectiveness of communication. We aim at modeling measurement patterns to identify effective ways of interaction of individuals, taking into consideration their cognitive and social behaviors.}, language = {en} } @phdthesis{Krohmer2016, author = {Krohmer, Anton}, title = {Structures \& algorithms in hyperbolic random graphs}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-395974}, school = {Universit{\"a}t Potsdam}, pages = {xii, 102}, year = {2016}, abstract = {Complex networks are ubiquitous in nature and society. They appear in vastly different domains, for instance as social networks, biological interactions or communication networks. Yet in spite of their different origins, these networks share many structural characteristics. For instance, their degree distribution typically follows a power law. This means that the fraction of vertices of degree k is proportional to k^(-β) for some constant β; making these networks highly inhomogeneous. Furthermore, they also typically have high clustering, meaning that links between two nodes are more likely to appear if they have a neighbor in common. To mathematically study the behavior of such networks, they are often modeled as random graphs. Many of the popular models like inhomogeneous random graphs or Preferential Attachment excel at producing a power law degree distribution. Clustering, on the other hand, is in these models either not present or artificially enforced. Hyperbolic random graphs bridge this gap by assuming an underlying geometry to the graph: Each vertex is assigned coordinates in the hyperbolic plane, and two vertices are connected if they are nearby. Clustering then emerges as a natural consequence: Two nodes joined by an edge are close by and therefore have many neighbors in common. On the other hand, the exponential expansion of space in the hyperbolic plane naturally produces a power law degree sequence. Due to the hyperbolic geometry, however, rigorous mathematical treatment of this model can quickly become mathematically challenging. In this thesis, we improve upon the understanding of hyperbolic random graphs by studying its structural and algorithmical properties. Our main contribution is threefold. First, we analyze the emergence of cliques in this model. We find that whenever the power law exponent β is 2 < β < 3, there exists a clique of polynomial size in n. On the other hand, for β >= 3, the size of the largest clique is logarithmic; which severely contrasts previous models with a constant size clique in this case. We also provide efficient algorithms for finding cliques if the hyperbolic node coordinates are known. Second, we analyze the diameter, i. e., the longest shortest path in the graph. We find that it is of order O(polylog(n)) if 2 < β < 3 and O(logn) if β > 3. To complement these findings, we also show that the diameter is of order at least Ω(logn). Third, we provide an algorithm for embedding a real-world graph into the hyperbolic plane using only its graph structure. To ensure good quality of the embedding, we perform extensive computational experiments on generated hyperbolic random graphs. Further, as a proof of concept, we embed the Amazon product recommendation network and observe that products from the same category are mapped close together.}, language = {en} }