@phdthesis{Abedjan2014, author = {Abedjan, Ziawasch}, title = {Improving RDF data with data mining}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-71334}, school = {Universit{\"a}t Potsdam}, year = {2014}, abstract = {Linked Open Data (LOD) comprises very many and often large public data sets and knowledge bases. Those datasets are mostly presented in the RDF triple structure of subject, predicate, and object, where each triple represents a statement or fact. Unfortunately, the heterogeneity of available open data requires significant integration steps before it can be used in applications. Meta information, such as ontological definitions and exact range definitions of predicates, are desirable and ideally provided by an ontology. However in the context of LOD, ontologies are often incomplete or simply not available. Thus, it is useful to automatically generate meta information, such as ontological dependencies, range definitions, and topical classifications. Association rule mining, which was originally applied for sales analysis on transactional databases, is a promising and novel technique to explore such data. We designed an adaptation of this technique for min-ing Rdf data and introduce the concept of "mining configurations", which allows us to mine RDF data sets in various ways. Different configurations enable us to identify schema and value dependencies that in combination result in interesting use cases. To this end, we present rule-based approaches for auto-completion, data enrichment, ontology improvement, and query relaxation. Auto-completion remedies the problem of inconsistent ontology usage, providing an editing user with a sorted list of commonly used predicates. A combination of different configurations step extends this approach to create completely new facts for a knowledge base. We present two approaches for fact generation, a user-based approach where a user selects the entity to be amended with new facts and a data-driven approach where an algorithm discovers entities that have to be amended with missing facts. As knowledge bases constantly grow and evolve, another approach to improve the usage of RDF data is to improve existing ontologies. Here, we present an association rule based approach to reconcile ontology and data. Interlacing different mining configurations, we infer an algorithm to discover synonymously used predicates. Those predicates can be used to expand query results and to support users during query formulation. We provide a wide range of experiments on real world datasets for each use case. The experiments and evaluations show the added value of association rule mining for the integration and usability of RDF data and confirm the appropriateness of our mining configuration methodology.}, language = {en} } @phdthesis{Awad2010, author = {Awad, Ahmed Mahmoud Hany Aly}, title = {A compliance management framework for business process models}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-49222}, school = {Universit{\"a}t Potsdam}, year = {2010}, abstract = {Companies develop process models to explicitly describe their business operations. In the same time, business operations, business processes, must adhere to various types of compliance requirements. Regulations, e.g., Sarbanes Oxley Act of 2002, internal policies, best practices are just a few sources of compliance requirements. In some cases, non-adherence to compliance requirements makes the organization subject to legal punishment. In other cases, non-adherence to compliance leads to loss of competitive advantage and thus loss of market share. Unlike the classical domain-independent behavioral correctness of business processes, compliance requirements are domain-specific. Moreover, compliance requirements change over time. New requirements might appear due to change in laws and adoption of new policies. Compliance requirements are offered or enforced by different entities that have different objectives behind these requirements. Finally, compliance requirements might affect different aspects of business processes, e.g., control flow and data flow. As a result, it is infeasible to hard-code compliance checks in tools. Rather, a repeatable process of modeling compliance rules and checking them against business processes automatically is needed. This thesis provides a formal approach to support process design-time compliance checking. Using visual patterns, it is possible to model compliance requirements concerning control flow, data flow and conditional flow rules. Each pattern is mapped into a temporal logic formula. The thesis addresses the problem of consistency checking among various compliance requirements, as they might stem from divergent sources. Also, the thesis contributes to automatically check compliance requirements against process models using model checking. We show that extra domain knowledge, other than expressed in compliance rules, is needed to reach correct decisions. In case of violations, we are able to provide a useful feedback to the user. The feedback is in the form of parts of the process model whose execution causes the violation. In some cases, our approach is capable of providing automated remedy of the violation.}, language = {en} } @phdthesis{Baier2015, author = {Baier, Thomas}, title = {Matching events and activities}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-84548}, school = {Universit{\"a}t Potsdam}, pages = {xxii, 213}, year = {2015}, abstract = {Nowadays, business processes are increasingly supported by IT services that produce massive amounts of event data during process execution. Aiming at a better process understanding and improvement, this event data can be used to analyze processes using process mining techniques. Process models can be automatically discovered and the execution can be checked for conformance to specified behavior. Moreover, existing process models can be enhanced and annotated with valuable information, for example for performance analysis. While the maturity of process mining algorithms is increasing and more tools are entering the market, process mining projects still face the problem of different levels of abstraction when comparing events with modeled business activities. Mapping the recorded events to activities of a given process model is essential for conformance checking, annotation and understanding of process discovery results. Current approaches try to abstract from events in an automated way that does not capture the required domain knowledge to fit business activities. Such techniques can be a good way to quickly reduce complexity in process discovery. Yet, they fail to enable techniques like conformance checking or model annotation, and potentially create misleading process discovery results by not using the known business terminology. In this thesis, we develop approaches that abstract an event log to the same level that is needed by the business. Typically, this abstraction level is defined by a given process model. Thus, the goal of this thesis is to match events from an event log to activities in a given process model. To accomplish this goal, behavioral and linguistic aspects of process models and event logs as well as domain knowledge captured in existing process documentation are taken into account to build semiautomatic matching approaches. The approaches establish a pre--processing for every available process mining technique that produces or annotates a process model, thereby reducing the manual effort for process analysts. While each of the presented approaches can be used in isolation, we also introduce a general framework for the integration of different matching approaches. The approaches have been evaluated in case studies with industry and using a large industry process model collection and simulated event logs. The evaluation demonstrates the effectiveness and efficiency of the approaches and their robustness towards nonconforming execution logs.}, language = {en} } @phdthesis{Bauckmann2013, author = {Bauckmann, Jana}, title = {Dependency discovery for data integration}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-66645}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {Data integration aims to combine data of different sources and to provide users with a unified view on these data. This task is as challenging as valuable. In this thesis we propose algorithms for dependency discovery to provide necessary information for data integration. We focus on inclusion dependencies (INDs) in general and a special form named conditional inclusion dependencies (CINDs): (i) INDs enable the discovery of structure in a given schema. (ii) INDs and CINDs support the discovery of cross-references or links between schemas. An IND "A in B" simply states that all values of attribute A are included in the set of values of attribute B. We propose an algorithm that discovers all inclusion dependencies in a relational data source. The challenge of this task is the complexity of testing all attribute pairs and further of comparing all of each attribute pair's values. The complexity of existing approaches depends on the number of attribute pairs, while ours depends only on the number of attributes. Thus, our algorithm enables to profile entirely unknown data sources with large schemas by discovering all INDs. Further, we provide an approach to extract foreign keys from the identified INDs. We extend our IND discovery algorithm to also find three special types of INDs: (i) Composite INDs, such as "AB in CD", (ii) approximate INDs that allow a certain amount of values of A to be not included in B, and (iii) prefix and suffix INDs that represent special cross-references between schemas. Conditional inclusion dependencies are inclusion dependencies with a limited scope defined by conditions over several attributes. Only the matching part of the instance must adhere the dependency. We generalize the definition of CINDs distinguishing covering and completeness conditions and define quality measures for conditions. We propose efficient algorithms that identify covering and completeness conditions conforming to given quality thresholds. The challenge for this task is twofold: (i) Which (and how many) attributes should be used for the conditions? (ii) Which attribute values should be chosen for the conditions? Previous approaches rely on pre-selected condition attributes or can only discover conditions applying to quality thresholds of 100\%. Our approaches were motivated by two application domains: data integration in the life sciences and link discovery for linked open data. We show the efficiency and the benefits of our approaches for use cases in these domains.}, language = {en} } @phdthesis{Becker2013, author = {Becker, Basil}, title = {Architectural modelling and verification of open service-oriented systems of systems}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-70158}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {Systems of Systems (SoS) have received a lot of attention recently. In this thesis we will focus on SoS that are built atop the techniques of Service-Oriented Architectures and thus combine the benefits and challenges of both paradigms. For this thesis we will understand SoS as ensembles of single autonomous systems that are integrated to a larger system, the SoS. The interesting fact about these systems is that the previously isolated systems are still maintained, improved and developed on their own. Structural dynamics is an issue in SoS, as at every point in time systems can join and leave the ensemble. This and the fact that the cooperation among the constituent systems is not necessarily observable means that we will consider these systems as open systems. Of course, the system has a clear boundary at each point in time, but this can only be identified by halting the complete SoS. However, halting a system of that size is practically impossible. Often SoS are combinations of software systems and physical systems. Hence a failure in the software system can have a serious physical impact what makes an SoS of this kind easily a safety-critical system. The contribution of this thesis is a modelling approach that extends OMG's SoaML and basically relies on collaborations and roles as an abstraction layer above the components. This will allow us to describe SoS at an architectural level. We will also give a formal semantics for our modelling approach which employs hybrid graph-transformation systems. The modelling approach is accompanied by a modular verification scheme that will be able to cope with the complexity constraints implied by the SoS' structural dynamics and size. Building such autonomous systems as SoS without evolution at the architectural level --- i. e. adding and removing of components and services --- is inadequate. Therefore our approach directly supports the modelling and verification of evolution.}, language = {en} } @phdthesis{Berg2013, author = {Berg, Gregor}, title = {Virtual prototypes for the model-based elicitation and validation of collaborative scenarios}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-69729}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {Requirements engineers have to elicit, document, and validate how stakeholders act and interact to achieve their common goals in collaborative scenarios. Only after gathering all information concerning who interacts with whom to do what and why, can a software system be designed and realized which supports the stakeholders to do their work. To capture and structure requirements of different (groups of) stakeholders, scenario-based approaches have been widely used and investigated. Still, the elicitation and validation of requirements covering collaborative scenarios remains complicated, since the required information is highly intertwined, fragmented, and distributed over several stakeholders. Hence, it can only be elicited and validated collaboratively. In times of globally distributed companies, scheduling and conducting workshops with groups of stakeholders is usually not feasible due to budget and time constraints. Talking to individual stakeholders, on the other hand, is feasible but leads to fragmented and incomplete stakeholder scenarios. Going back and forth between different individual stakeholders to resolve this fragmentation and explore uncovered alternatives is an error-prone, time-consuming, and expensive task for the requirements engineers. While formal modeling methods can be employed to automatically check and ensure consistency of stakeholder scenarios, such methods introduce additional overhead since their formal notations have to be explained in each interaction between stakeholders and requirements engineers. Tangible prototypes as they are used in other disciplines such as design, on the other hand, allow designers to feasibly validate and iterate concepts and requirements with stakeholders. This thesis proposes a model-based approach for prototyping formal behavioral specifications of stakeholders who are involved in collaborative scenarios. By simulating and animating such specifications in a remote domain-specific visualization, stakeholders can experience and validate the scenarios captured so far, i.e., how other stakeholders act and react. This interactive scenario simulation is referred to as a model-based virtual prototype. Moreover, through observing how stakeholders interact with a virtual prototype of their collaborative scenarios, formal behavioral specifications can be automatically derived which complete the otherwise fragmented scenarios. This, in turn, enables requirements engineers to elicit and validate collaborative scenarios in individual stakeholder sessions - decoupled, since stakeholders can participate remotely and are not forced to be available for a joint session at the same time. This thesis discusses and evaluates the feasibility, understandability, and modifiability of model-based virtual prototypes. Similarly to how physical prototypes are perceived, the presented approach brings behavioral models closer to being tangible for stakeholders and, moreover, combines the advantages of joint stakeholder sessions and decoupled sessions.}, language = {en} } @phdthesis{Beyhl2017, author = {Beyhl, Thomas}, title = {A framework for incremental view graph maintenance}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-405929}, school = {Universit{\"a}t Potsdam}, pages = {VII, 293}, year = {2017}, abstract = {Nowadays, graph data models are employed, when relationships between entities have to be stored and are in the scope of queries. For each entity, this graph data model locally stores relationships to adjacent entities. Users employ graph queries to query and modify these entities and relationships. These graph queries employ graph patterns to lookup all subgraphs in the graph data that satisfy certain graph structures. These subgraphs are called graph pattern matches. However, this graph pattern matching is NP-complete for subgraph isomorphism. Thus, graph queries can suffer a long response time, when the number of entities and relationships in the graph data or the graph patterns increases. One possibility to improve the graph query performance is to employ graph views that keep ready graph pattern matches for complex graph queries for later retrieval. However, these graph views must be maintained by means of an incremental graph pattern matching to keep them consistent with the graph data from which they are derived, when the graph data changes. This maintenance adds subgraphs that satisfy a graph pattern to the graph views and removes subgraphs that do not satisfy a graph pattern anymore from the graph views. Current approaches for incremental graph pattern matching employ Rete networks. Rete networks are discrimination networks that enumerate and maintain all graph pattern matches of certain graph queries by employing a network of condition tests, which implement partial graph patterns that together constitute the overall graph query. Each condition test stores all subgraphs that satisfy the partial graph pattern. Thus, Rete networks suffer high memory consumptions, because they store a large number of partial graph pattern matches. But, especially these partial graph pattern matches enable Rete networks to update the stored graph pattern matches efficiently, because the network maintenance exploits the already stored partial graph pattern matches to find new graph pattern matches. However, other kinds of discrimination networks exist that can perform better in time and space than Rete networks. Currently, these other kinds of networks are not used for incremental graph pattern matching. This thesis employs generalized discrimination networks for incremental graph pattern matching. These discrimination networks permit a generalized network structure of condition tests to enable users to steer the trade-off between memory consumption and execution time for the incremental graph pattern matching. For that purpose, this thesis contributes a modeling language for the effective definition of generalized discrimination networks. Furthermore, this thesis contributes an efficient and scalable incremental maintenance algorithm, which updates the (partial) graph pattern matches that are stored by each condition test. Moreover, this thesis provides a modeling evaluation, which shows that the proposed modeling language enables the effective modeling of generalized discrimination networks. Furthermore, this thesis provides a performance evaluation, which shows that a) the incremental maintenance algorithm scales, when the graph data becomes large, and b) the generalized discrimination network structures can outperform Rete network structures in time and space at the same time for incremental graph pattern matching.}, language = {en} } @phdthesis{Boehm2013, author = {B{\"o}hm, Christoph}, title = {Enriching the Web of Data with topics and links}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-68624}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {This thesis presents novel ideas and research findings for the Web of Data - a global data space spanning many so-called Linked Open Data sources. Linked Open Data adheres to a set of simple principles to allow easy access and reuse for data published on the Web. Linked Open Data is by now an established concept and many (mostly academic) publishers adopted the principles building a powerful web of structured knowledge available to everybody. However, so far, Linked Open Data does not yet play a significant role among common web technologies that currently facilitate a high-standard Web experience. In this work, we thoroughly discuss the state-of-the-art for Linked Open Data and highlight several shortcomings - some of them we tackle in the main part of this work. First, we propose a novel type of data source meta-information, namely the topics of a dataset. This information could be published with dataset descriptions and support a variety of use cases, such as data source exploration and selection. For the topic retrieval, we present an approach coined Annotated Pattern Percolation (APP), which we evaluate with respect to topics extracted from Wikipedia portals. Second, we contribute to entity linking research by presenting an optimization model for joint entity linking, showing its hardness, and proposing three heuristics implemented in the LINked Data Alignment (LINDA) system. Our first solution can exploit multi-core machines, whereas the second and third approach are designed to run in a distributed shared-nothing environment. We discuss and evaluate the properties of our approaches leading to recommendations which algorithm to use in a specific scenario. The distributed algorithms are among the first of their kind, i.e., approaches for joint entity linking in a distributed fashion. Also, we illustrate that we can tackle the entity linking problem on the very large scale with data comprising more than 100 millions of entity representations from very many sources. Finally, we approach a sub-problem of entity linking, namely the alignment of concepts. We again target a method that looks at the data in its entirety and does not neglect existing relations. Also, this concept alignment method shall execute very fast to serve as a preprocessing for further computations. Our approach, called Holistic Concept Matching (HCM), achieves the required speed through grouping the input by comparing so-called knowledge representations. Within the groups, we perform complex similarity computations, relation conclusions, and detect semantic contradictions. The quality of our result is again evaluated on a large and heterogeneous dataset from the real Web. In summary, this work contributes a set of techniques for enhancing the current state of the Web of Data. All approaches have been tested on large and heterogeneous real-world input.}, language = {en} } @phdthesis{Dawoud2013, author = {Dawoud, Wesam}, title = {Scalability and performance management of internet applications in the cloud}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-68187}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {Cloud computing is a model for enabling on-demand access to a shared pool of computing resources. With virtually limitless on-demand resources, a cloud environment enables the hosted Internet application to quickly cope when there is an increase in the workload. However, the overhead of provisioning resources exposes the Internet application to periods of under-provisioning and performance degradation. Moreover, the performance interference, due to the consolidation in the cloud environment, complicates the performance management of the Internet applications. In this dissertation, we propose two approaches to mitigate the impact of the resources provisioning overhead. The first approach employs control theory to scale resources vertically and cope fast with workload. This approach assumes that the provider has knowledge and control over the platform running in the virtual machines (VMs), which limits it to Platform as a Service (PaaS) and Software as a Service (SaaS) providers. The second approach is a customer-side one that deals with the horizontal scalability in an Infrastructure as a Service (IaaS) model. It addresses the trade-off problem between cost and performance with a multi-goal optimization solution. This approach finds the scale thresholds that achieve the highest performance with the lowest increase in the cost. Moreover, the second approach employs a proposed time series forecasting algorithm to scale the application proactively and avoid under-utilization periods. Furthermore, to mitigate the interference impact on the Internet application performance, we developed a system which finds and eliminates the VMs suffering from performance interference. The developed system is a light-weight solution which does not imply provider involvement. To evaluate our approaches and the designed algorithms at large-scale level, we developed a simulator called (ScaleSim). In the simulator, we implemented scalability components acting as the scalability components of Amazon EC2. The current scalability implementation in Amazon EC2 is used as a reference point for evaluating the improvement in the scalable application performance. ScaleSim is fed with realistic models of the RUBiS benchmark extracted from the real environment. The workload is generated from the access logs of the 1998 world cup website. The results show that optimizing the scalability thresholds and adopting proactive scalability can mitigate 88\% of the resources provisioning overhead impact with only a 9\% increase in the cost.}, language = {en} } @phdthesis{EidSabbagh2015, author = {Eid-Sabbagh, Rami-Habib}, title = {Business process architectures}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-79719}, school = {Universit{\"a}t Potsdam}, pages = {xvii, 256}, year = {2015}, abstract = {Business Process Management has become an integral part of modern organizations in the private and public sector for improving their operations. In the course of Business Process Management efforts, companies and organizations assemble large process model repositories with many hundreds and thousands of business process models bearing a large amount of information. With the advent of large business process model collections, new challenges arise as structuring and managing a large amount of process models, their maintenance, and their quality assurance. This is covered by business process architectures that have been introduced for organizing and structuring business process model collections. A variety of business process architecture approaches have been proposed that align business processes along aspects of interest, e. g., goals, functions, or objects. They provide a high level categorization of single processes ignoring their interdependencies, thus hiding valuable information. The production of goods or the delivery of services are often realized by a complex system of interdependent business processes. Hence, taking a holistic view at business processes interdependencies becomes a major necessity to organize, analyze, and assess the impact of their re-/design. Visualizing business processes interdependencies reveals hidden and implicit information from a process model collection. In this thesis, we present a novel Business Process Architecture approach for representing and analyzing business process interdependencies on an abstract level. We propose a formal definition of our Business Process Architecture approach, design correctness criteria, and develop analysis techniques for assessing their quality. We describe a methodology for applying our Business Process Architecture approach top-down and bottom-up. This includes techniques for Business Process Architecture extraction from, and decomposition to process models while considering consistency issues between business process architecture and process model level. Using our extraction algorithm, we present a novel technique to identify and visualize data interdependencies in Business Process Data Architectures. Our Business Process Architecture approach provides business process experts,managers, and other users of a process model collection with an overview that allows reasoning about a large set of process models, understanding, and analyzing their interdependencies in a facilitated way. In this regard we evaluated our Business Process Architecture approach in an experiment and provide implementations of selected techniques.}, language = {en} } @phdthesis{Glander2012, author = {Glander, Tassilo}, title = {Multi-scale representations of virtual 3D city models}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-64117}, school = {Universit{\"a}t Potsdam}, year = {2012}, abstract = {Virtual 3D city and landscape models are the main subject investigated in this thesis. They digitally represent urban space and have many applications in different domains, e.g., simulation, cadastral management, and city planning. Visualization is an elementary component of these applications. Photo-realistic visualization with an increasingly high degree of detail leads to fundamental problems for comprehensible visualization. A large number of highly detailed and textured objects within a virtual 3D city model may create visual noise and overload the users with information. Objects are subject to perspective foreshortening and may be occluded or not displayed in a meaningful way, as they are too small. In this thesis we present abstraction techniques that automatically process virtual 3D city and landscape models to derive abstracted representations. These have a reduced degree of detail, while essential characteristics are preserved. After introducing definitions for model, scale, and multi-scale representations, we discuss the fundamentals of map generalization as well as techniques for 3D generalization. The first presented technique is a cell-based generalization of virtual 3D city models. It creates abstract representations that have a highly reduced level of detail while maintaining essential structures, e.g., the infrastructure network, landmark buildings, and free spaces. The technique automatically partitions the input virtual 3D city model into cells based on the infrastructure network. The single building models contained in each cell are aggregated to abstracted cell blocks. Using weighted infrastructure elements, cell blocks can be computed on different hierarchical levels, storing the hierarchy relation between the cell blocks. Furthermore, we identify initial landmark buildings within a cell by comparing the properties of individual buildings with the aggregated properties of the cell. For each block, the identified landmark building models are subtracted using Boolean operations and integrated in a photo-realistic way. Finally, for the interactive 3D visualization we discuss the creation of the virtual 3D geometry and their appearance styling through colors, labeling, and transparency. We demonstrate the technique with example data sets. Additionally, we discuss applications of generalization lenses and transitions between abstract representations. The second technique is a real-time-rendering technique for geometric enhancement of landmark objects within a virtual 3D city model. Depending on the virtual camera distance, landmark objects are scaled to ensure their visibility within a specific distance interval while deforming their environment. First, in a preprocessing step a landmark hierarchy is computed, this is then used to derive distance intervals for the interactive rendering. At runtime, using the virtual camera distance, a scaling factor is computed and applied to each landmark. The scaling factor is interpolated smoothly at the interval boundaries using cubic B{\´e}zier splines. Non-landmark geometry that is near landmark objects is deformed with respect to a limited number of landmarks. We demonstrate the technique by applying it to a highly detailed virtual 3D city model and a generalized 3D city model. In addition we discuss an adaptation of the technique for non-linear projections and mobile devices. The third technique is a real-time rendering technique to create abstract 3D isocontour visualization of virtual 3D terrain models. The virtual 3D terrain model is visualized as a layered or stepped relief. The technique works without preprocessing and, as it is implemented using programmable graphics hardware, can be integrated with minimal changes into common terrain rendering techniques. Consequently, the computation is done in the rendering pipeline for each vertex, primitive, i.e., triangle, and fragment. For each vertex, the height is quantized to the nearest isovalue. For each triangle, the vertex configuration with respect to their isovalues is determined first. Using the configuration, the triangle is then subdivided. The subdivision forms a partial step geometry aligned with the triangle. For each fragment, the surface appearance is determined, e.g., depending on the surface texture, shading, and height-color-mapping. Flexible usage of the technique is demonstrated with applications from focus+context visualization, out-of-core terrain rendering, and information visualization. This thesis presents components for the creation of abstract representations of virtual 3D city and landscape models. Re-using visual language from cartography, the techniques enable users to build on their experience with maps when interpreting these representations. Simultaneously, characteristics of 3D geovirtual environments are taken into account by addressing and discussing, e.g., continuous scale, interaction, and perspective.}, language = {en} } @phdthesis{Gumienny2013, author = {Gumienny, Raja Carola}, title = {Understanding the adoption of digital whiteboard systems for collaborative design work}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-72417}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {User-centered design processes are the first choice when new interactive systems or services are developed to address real customer needs and provide a good user experience. Common tools for collecting user research data, conducting brainstormings, or sketching ideas are whiteboards and sticky notes. They are ubiquitously available, and no technical or domain knowledge is necessary to use them. However, traditional pen and paper tools fall short when saving the content and sharing it with others unable to be in the same location. They are also missing further digital advantages such as searching or sorting content. Although research on digital whiteboard and sticky note applications has been conducted for over 20 years, these tools are not widely adopted in company contexts. While many research prototypes exist, they have not been used for an extended period of time in a real-world context. The goal of this thesis is to investigate what the enablers and obstacles for the adoption of digital whiteboard systems are. As an instrument for different studies, we developed the Tele-Board software system for collaborative creative work. Based on interviews, observations, and findings from former research, we tried to transfer the analog way of working to the digital world. Being a software system, Tele-Board can be used with a variety of hardware and does not depend on special devices. This feature became one of the main factors for adoption on a larger scale. In this thesis, I will present three studies on the use of Tele-Board with different user groups and foci. I will use a combination of research methods (laboratory case studies and data from field research) with the overall goal of finding out when a digital whiteboard system is used and in which cases not. Not surprisingly, the system is used and accepted if a user sees a main benefit that neither analog tools nor other applications can offer. However, I found that these perceived benefits are very different for each user and usage context. If a tool provides possibilities to use in different ways and with different equipment, the chances of its adoption by a larger group increase. Tele-Board has now been in use for over 1.5 years in a global IT company in at least five countries with a constantly growing user base. Its use, advantages, and disadvantages will be described based on 42 interviews and usage statistics from server logs. Through these insights and findings from laboratory case studies, I will present a detailed analysis of digital whiteboard use in different contexts with design implications for future systems.}, language = {en} } @phdthesis{Gustafson2013, author = {Gustafson, Sean}, title = {Imaginary Interfaces}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-68960}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {The size of a mobile device is primarily determined by the size of the touchscreen. As such, researchers have found that the way to achieve ultimate mobility is to abandon the screen altogether. These wearable devices are operated using hand gestures, voice commands or a small number of physical buttons. By abandoning the screen these devices also abandon the currently dominant spatial interaction style (such as tapping on buttons), because, seemingly, there is nothing to tap on. Unfortunately this design prevents users from transferring their learned interaction knowledge gained from traditional touchscreen-based devices. In this dissertation, I present Imaginary Interfaces, which return spatial interaction to screenless mobile devices. With these interfaces, users point and draw in the empty space in front of them or on the palm of their hands. While they cannot see the results of their interaction, they obtain some visual and tactile feedback by watching and feeling their hands interact. After introducing the concept of Imaginary Interfaces, I present two hardware prototypes that showcase two different forms of interaction with an imaginary interface, each with its own advantages: mid-air imaginary interfaces can be large and expressive, while palm-based imaginary interfaces offer an abundance of tactile features that encourage learning. Given that imaginary interfaces offer no visual output, one of the key challenges is to enable users to discover the interface's layout. This dissertation offers three main solutions: offline learning with coordinates, browsing with audio feedback and learning by transfer. The latter I demonstrate with the Imaginary Phone, a palm-based imaginary interface that mimics the layout of a physical mobile phone that users are already familiar with. Although these designs enable interaction with Imaginary Interfaces, they tell us little about why this interaction is possible. In the final part of this dissertation, I present an exploration into which human perceptual abilities are used when interacting with a palm-based imaginary interface and how much each accounts for performance with the interface. These findings deepen our understanding of Imaginary Interfaces and suggest that palm-based Imaginary Interfaces can enable stand-alone eyes-free use for many applications, including interfaces for visually impaired users.}, language = {en} } @phdthesis{Hebig2014, author = {Hebig, Regina}, title = {Evolution of model-driven engineering settings in practice}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-70761}, school = {Universit{\"a}t Potsdam}, year = {2014}, abstract = {Nowadays, software systems are getting more and more complex. To tackle this challenge most diverse techniques, such as design patterns, service oriented architectures (SOA), software development processes, and model-driven engineering (MDE), are used to improve productivity, while time to market and quality of the products stay stable. Multiple of these techniques are used in parallel to profit from their benefits. While the use of sophisticated software development processes is standard, today, MDE is just adopted in practice. However, research has shown that the application of MDE is not always successful. It is not fully understood when advantages of MDE can be used and to what degree MDE can also be disadvantageous for productivity. Further, when combining different techniques that aim to affect the same factor (e.g. productivity) the question arises whether these techniques really complement each other or, in contrast, compensate their effects. Due to that, there is the concrete question how MDE and other techniques, such as software development process, are interrelated. Both aspects (advantages and disadvantages for productivity as well as the interrelation to other techniques) need to be understood to identify risks relating to the productivity impact of MDE. Before studying MDE's impact on productivity, it is necessary to investigate the range of validity that can be reached for the results. This includes two questions. First, there is the question whether MDE's impact on productivity is similar for all approaches of adopting MDE in practice. Second, there is the question whether MDE's impact on productivity for an approach of using MDE in practice remains stable over time. The answers for both questions are crucial for handling risks of MDE, but also for the design of future studies on MDE success. This thesis addresses these questions with the goal to support adoption of MDE in future. To enable a differentiated discussion about MDE, the term MDE setting'' is introduced. MDE setting refers to the applied technical setting, i.e. the employed manual and automated activities, artifacts, languages, and tools. An MDE setting's possible impact on productivity is studied with a focus on changeability and the interrelation to software development processes. This is done by introducing a taxonomy of changeability concerns that might be affected by an MDE setting. Further, three MDE traits are identified and it is studied for which manifestations of these MDE traits software development processes are impacted. To enable the assessment and evaluation of an MDE setting's impacts, the Software Manufacture Model language is introduced. This is a process modeling language that allows to reason about how relations between (modeling) artifacts (e.g. models or code files) change during application of manual or automated development activities. On that basis, risk analysis techniques are provided. These techniques allow identifying changeability risks and assessing the manifestations of the MDE traits (and with it an MDE setting's impact on software development processes). To address the range of validity, MDE settings from practice and their evolution histories were capture in context of this thesis. First, this data is used to show that MDE settings cover the whole spectrum concerning their impact on changeability or interrelation to software development processes. Neither it is seldom that MDE settings are neutral for processes nor is it seldom that MDE settings have impact on processes. Similarly, the impact on changeability differs relevantly. Second, a taxonomy of evolution of MDE settings is introduced. In that context it is discussed to what extent different types of changes on an MDE setting can influence this MDE setting's impact on changeability and the interrelation to processes. The category of structural evolution, which can change these characteristics of an MDE setting, is identified. The captured MDE settings from practice are used to show that structural evolution exists and is common. In addition, some examples of structural evolution steps are collected that actually led to a change in the characteristics of the respective MDE settings. Two implications are: First, the assessed diversity of MDE settings evaluates the need for the analysis techniques that shall be presented in this thesis. Second, evolution is one explanation for the diversity of MDE settings in practice. To summarize, this thesis studies the nature and evolution of MDE settings in practice. As a result support for the adoption of MDE settings is provided in form of techniques for the identification of risks relating to productivity impacts.}, language = {en} } @phdthesis{Heise2014, author = {Heise, Arvid}, title = {Data cleansing and integration operators for a parallel data analytics platform}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-77100}, school = {Universit{\"a}t Potsdam}, pages = {ii, 179}, year = {2014}, abstract = {The data quality of real-world datasets need to be constantly monitored and maintained to allow organizations and individuals to reliably use their data. Especially, data integration projects suffer from poor initial data quality and as a consequence consume more effort and money. Commercial products and research prototypes for data cleansing and integration help users to improve the quality of individual and combined datasets. They can be divided into either standalone systems or database management system (DBMS) extensions. On the one hand, standalone systems do not interact well with DBMS and require time-consuming data imports and exports. On the other hand, DBMS extensions are often limited by the underlying system and do not cover the full set of data cleansing and integration tasks. We overcome both limitations by implementing a concise set of five data cleansing and integration operators on the parallel data analytics platform Stratosphere. We define the semantics of the operators, present their parallel implementation, and devise optimization techniques for individual operators and combinations thereof. Users specify declarative queries in our query language METEOR with our new operators to improve the data quality of individual datasets or integrate them to larger datasets. By integrating the data cleansing operators into the higher level language layer of Stratosphere, users can easily combine cleansing operators with operators from other domains, such as information extraction, to complex data flows. Through a generic description of the operators, the Stratosphere optimizer reorders operators even from different domains to find better query plans. As a case study, we reimplemented a part of the large Open Government Data integration project GovWILD with our new operators and show that our queries run significantly faster than the original GovWILD queries, which rely on relational operators. Evaluation reveals that our operators exhibit good scalability on up to 100 cores, so that even larger inputs can be efficiently processed by scaling out to more machines. Finally, our scripts are considerably shorter than the original GovWILD scripts, which results in better maintainability of the scripts.}, language = {en} } @phdthesis{Holz2013, author = {Holz, Christian}, title = {3D from 2D touch}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-67796}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {While interaction with computers used to be dominated by mice and keyboards, new types of sensors now allow users to interact through touch, speech, or using their whole body in 3D space. These new interaction modalities are often referred to as "natural user interfaces" or "NUIs." While 2D NUIs have experienced major success on billions of mobile touch devices sold, 3D NUI systems have so far been unable to deliver a mobile form factor, mainly due to their use of cameras. The fact that cameras require a certain distance from the capture volume has prevented 3D NUI systems from reaching the flat form factor mobile users expect. In this dissertation, we address this issue by sensing 3D input using flat 2D sensors. The systems we present observe the input from 3D objects as 2D imprints upon physical contact. By sampling these imprints at very high resolutions, we obtain the objects' textures. In some cases, a texture uniquely identifies a biometric feature, such as the user's fingerprint. In other cases, an imprint stems from the user's clothing, such as when walking on multitouch floors. By analyzing from which part of the 3D object the 2D imprint results, we reconstruct the object's pose in 3D space. While our main contribution is a general approach to sensing 3D input on 2D sensors upon physical contact, we also demonstrate three applications of our approach. (1) We present high-accuracy touch devices that allow users to reliably touch targets that are a third of the size of those on current touch devices. We show that different users and 3D finger poses systematically affect touch sensing, which current devices perceive as random input noise. We introduce a model for touch that compensates for this systematic effect by deriving the 3D finger pose and the user's identity from each touch imprint. We then investigate this systematic effect in detail and explore how users conceptually touch targets. Our findings indicate that users aim by aligning visual features of their fingers with the target. We present a visual model for touch input that eliminates virtually all systematic effects on touch accuracy. (2) From each touch, we identify users biometrically by analyzing their fingerprints. Our prototype Fiberio integrates fingerprint scanning and a display into the same flat surface, solving a long-standing problem in human-computer interaction: secure authentication on touchscreens. Sensing 3D input and authenticating users upon touch allows Fiberio to implement a variety of applications that traditionally require the bulky setups of current 3D NUI systems. (3) To demonstrate the versatility of 3D reconstruction on larger touch surfaces, we present a high-resolution pressure-sensitive floor that resolves the texture of objects upon touch. Using the same principles as before, our system GravitySpace analyzes all imprints and identifies users based on their shoe soles, detects furniture, and enables accurate touch input using feet. By classifying all imprints, GravitySpace detects the users' body parts that are in contact with the floor and then reconstructs their 3D body poses using inverse kinematics. GravitySpace thus enables a range of applications for future 3D NUI systems based on a flat sensor, such as smart rooms in future homes. We conclude this dissertation by projecting into the future of mobile devices. Focusing on the mobility aspect of our work, we explore how NUI devices may one day augment users directly in the form of implanted devices.}, language = {en} } @phdthesis{Kluth2011, author = {Kluth, Stephan}, title = {Quantitative modeling and analysis with FMC-QE}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-52987}, school = {Universit{\"a}t Potsdam}, year = {2011}, abstract = {The modeling and evaluation calculus FMC-QE, the Fundamental Modeling Concepts for Quanti-tative Evaluation [1], extends the Fundamental Modeling Concepts (FMC) for performance modeling and prediction. In this new methodology, the hierarchical service requests are in the main focus, because they are the origin of every service provisioning process. Similar to physics, these service requests are a tuple of value and unit, which enables hierarchical service request transformations at the hierarchical borders and therefore the hierarchical modeling. Through reducing the model complexity of the models by decomposing the system in different hierarchical views, the distinction between operational and control states and the calculation of the performance values on the assumption of the steady state, FMC-QE has a scalable applica-bility on complex systems. According to FMC, the system is modeled in a 3-dimensional hierarchical representation space, where system performance parameters are described in three arbitrarily fine-grained hierarchi-cal bipartite diagrams. The hierarchical service request structures are modeled in Entity Relationship Diagrams. The static server structures, divided into logical and real servers, are de-scribed as Block Diagrams. The dynamic behavior and the control structures are specified as Petri Nets, more precisely Colored Time Augmented Petri Nets. From the structures and pa-rameters of the performance model, a hierarchical set of equations is derived. The calculation of the performance values is done on the assumption of stationary processes and is based on fundamental laws of the performance analysis: Little's Law and the Forced Traffic Flow Law. Little's Law is used within the different hierarchical levels (horizontal) and the Forced Traffic Flow Law is the key to the dependencies among the hierarchical levels (vertical). This calculation is suitable for complex models and allows a fast (re-)calculation of different performance scenarios in order to support development and configuration decisions. Within the Research Group Zorn at the Hasso Plattner Institute, the work is embedded in a broader research in the development of FMC-QE. While this work is concentrated on the theoretical background, description and definition of the methodology as well as the extension and validation of the applicability, other topics are in the development of an FMC-QE modeling and evaluation tool and the usage of FMC-QE in the design of an adaptive transport layer in order to fulfill Quality of Service and Service Level Agreements in volatile service based environments. This thesis contains a state-of-the-art, the description of FMC-QE as well as extensions of FMC-QE in representative general models and case studies. In the state-of-the-art part of the thesis in chapter 2, an overview on existing Queueing Theory and Time Augmented Petri Net models and other quantitative modeling and evaluation languages and methodologies is given. Also other hierarchical quantitative modeling frameworks will be considered. The description of FMC-QE in chapter 3 consists of a summary of the foundations of FMC-QE, basic definitions, the graphical notations, the FMC-QE Calculus and the modeling of open queueing networks as an introductory example. The extensions of FMC-QE in chapter 4 consist of the integration of the summation method in order to support the handling of closed networks and the modeling of multiclass and semaphore scenarios. Furthermore, FMC-QE is compared to other performance modeling and evaluation approaches. In the case study part in chapter 5, proof-of-concept examples, like the modeling of a service based search portal, a service based SAP NetWeaver application and the Axis2 Web service framework will be provided. Finally, conclusions are given by a summary of contributions and an outlook on future work in chapter 6. [1] Werner Zorn. FMC-QE - A New Approach in Quantitative Modeling. In Hamid R. Arabnia, editor, Procee-dings of the International Conference on Modeling, Simulation and Visualization Methods (MSV 2007) within WorldComp '07, pages 280 - 287, Las Vegas, NV, USA, June 2007. CSREA Press. ISBN 1-60132-029-9.}, language = {en} } @phdthesis{Kunze2013, author = {Kunze, Matthias}, title = {Searching business process models by example}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-68844}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {Business processes are fundamental to the operations of a company. Each product manufactured and every service provided is the result of a series of actions that constitute a business process. Business process management is an organizational principle that makes the processes of a company explicit and offers capabilities to implement procedures, control their execution, analyze their performance, and improve them. Therefore, business processes are documented as process models that capture these actions and their execution ordering, and make them accessible to stakeholders. As these models are an essential knowledge asset, they need to be managed effectively. In particular, the discovery and reuse of existing knowledge becomes challenging in the light of companies maintaining hundreds and thousands of process models. In practice, searching process models has been solved only superficially by means of free-text search of process names and their descriptions. Scientific contributions are limited in their scope, as they either present measures for process similarity or elaborate on query languages to search for particular aspects. However, they fall short in addressing efficient search, the presentation of search results, and the support to reuse discovered models. This thesis presents a novel search method, where a query is expressed by an exemplary business process model that describes the behavior of a possible answer. This method builds upon a formal framework that captures and compares the behavior of process models by the execution ordering of actions. The framework contributes a conceptual notion of behavioral distance that quantifies commonalities and differences of a pair of process models, and enables process model search. Based on behavioral distances, a set of measures is proposed that evaluate the quality of a particular search result to guide the user in assessing the returned matches. A projection of behavioral aspects to a process model enables highlighting relevant fragments that led to a match and facilitates its reuse. The thesis further elaborates on two search techniques that provide concrete behavioral distance functions as an instantiation of the formal framework. Querying enables search with a notion of behavioral inclusion with regard to the query. In contrast, similarity search obtains process models that are similar to a query, even if the query is not precisely matched. For both techniques, indexes are presented that enable efficient search. Methods to evaluate the quality and performance of process model search are introduced and applied to the techniques of this thesis. They show good results with regard to human assessment and scalability in a practical setting.}, language = {en} } @phdthesis{Kyprianidis2013, author = {Kyprianidis, Jan Eric}, title = {Structure adaptive stylization of images and video}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-64104}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {In the early days of computer graphics, research was mainly driven by the goal to create realistic synthetic imagery. By contrast, non-photorealistic computer graphics, established as its own branch of computer graphics in the early 1990s, is mainly motivated by concepts and principles found in traditional art forms, such as painting, illustration, and graphic design, and it investigates concepts and techniques that abstract from reality using expressive, stylized, or illustrative rendering techniques. This thesis focuses on the artistic stylization of two-dimensional content and presents several novel automatic techniques for the creation of simplified stylistic illustrations from color images, video, and 3D renderings. Primary innovation of these novel techniques is that they utilize the smooth structure tensor as a simple and efficient way to obtain information about the local structure of an image. More specifically, this thesis contributes to knowledge in this field in the following ways. First, a comprehensive review of the structure tensor is provided. In particular, different methods for integrating the minor eigenvector field of the smoothed structure tensor are developed, and the superiority of the smoothed structure tensor over the popular edge tangent flow is demonstrated. Second, separable implementations of the popular bilateral and difference of Gaussians filters that adapt to the local structure are presented. These filters avoid artifacts while being computationally highly efficient. Taken together, both provide an effective way to create a cartoon-style effect. Third, a generalization of the Kuwahara filter is presented that avoids artifacts by adapting the shape, scale, and orientation of the filter to the local structure. This causes directional image features to be better preserved and emphasized, resulting in overall sharper edges and a more feature-abiding painterly effect. In addition to the single-scale variant, a multi-scale variant is presented, which is capable of performing a highly aggressive abstraction. Fourth, a technique that builds upon the idea of combining flow-guided smoothing with shock filtering is presented, allowing for an aggressive exaggeration and an emphasis of directional image features. All presented techniques are suitable for temporally coherent per-frame filtering of video or dynamic 3D renderings, without requiring expensive extra processing, such as optical flow. Moreover, they can be efficiently implemented to process content in real-time on a GPU.}, language = {en} } @phdthesis{Lange2013, author = {Lange, Dustin}, title = {Effective and efficient similarity search in databases}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-65712}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {Given a large set of records in a database and a query record, similarity search aims to find all records sufficiently similar to the query record. To solve this problem, two main aspects need to be considered: First, to perform effective search, the set of relevant records is defined using a similarity measure. Second, an efficient access method is to be found that performs only few database accesses and comparisons using the similarity measure. This thesis solves both aspects with an emphasis on the latter. In the first part of this thesis, a frequency-aware similarity measure is introduced. Compared record pairs are partitioned according to frequencies of attribute values. For each partition, a different similarity measure is created: machine learning techniques combine a set of base similarity measures into an overall similarity measure. After that, a similarity index for string attributes is proposed, the State Set Index (SSI), which is based on a trie (prefix tree) that is interpreted as a nondeterministic finite automaton. For processing range queries, the notion of query plans is introduced in this thesis to describe which similarity indexes to access and which thresholds to apply. The query result should be as complete as possible under some cost threshold. Two query planning variants are introduced: (1) Static planning selects a plan at compile time that is used for all queries. (2) Query-specific planning selects a different plan for each query. For answering top-k queries, the Bulk Sorted Access Algorithm (BSA) is introduced, which retrieves large chunks of records from the similarity indexes using fixed thresholds, and which focuses its efforts on records that are ranked high in more than one attribute and thus promising candidates. The described components form a complete similarity search system. Based on prototypical implementations, this thesis shows comparative evaluation results for all proposed approaches on different real-world data sets, one of which is a large person data set from a German credit rating agency.}, language = {en} } @phdthesis{Lorey2014, author = {Lorey, Johannes}, title = {What's in a query : analyzing, predicting, and managing linked data access}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-72312}, school = {Universit{\"a}t Potsdam}, year = {2014}, abstract = {The term Linked Data refers to connected information sources comprising structured data about a wide range of topics and for a multitude of applications. In recent years, the conceptional and technical foundations of Linked Data have been formalized and refined. To this end, well-known technologies have been established, such as the Resource Description Framework (RDF) as a Linked Data model or the SPARQL Protocol and RDF Query Language (SPARQL) for retrieving this information. Whereas most research has been conducted in the area of generating and publishing Linked Data, this thesis presents novel approaches for improved management. In particular, we illustrate new methods for analyzing and processing SPARQL queries. Here, we present two algorithms suitable for identifying structural relationships between these queries. Both algorithms are applied to a large number of real-world requests to evaluate the performance of the approaches and the quality of their results. Based on this, we introduce different strategies enabling optimized access of Linked Data sources. We demonstrate how the presented approach facilitates effective utilization of SPARQL endpoints by prefetching results relevant for multiple subsequent requests. Furthermore, we contribute a set of metrics for determining technical characteristics of such knowledge bases. To this end, we devise practical heuristics and validate them through thorough analysis of real-world data sources. We discuss the findings and evaluate their impact on utilizing the endpoints. Moreover, we detail the adoption of a scalable infrastructure for improving Linked Data discovery and consumption. As we outline in an exemplary use case, this platform is eligible both for processing and provisioning the corresponding information.}, language = {en} } @phdthesis{Menzel2011, author = {Menzel, Michael}, title = {Model-driven security in service-oriented architectures : leveraging security patterns to transform high-level security requirements to technical policies}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-59058}, school = {Universit{\"a}t Potsdam}, year = {2011}, abstract = {Service-oriented Architectures (SOA) facilitate the provision and orchestration of business services to enable a faster adoption to changing business demands. Web Services provide a technical foundation to implement this paradigm on the basis of XML-messaging. However, the enhanced flexibility of message-based systems comes along with new threats and risks. To face these issues, a variety of security mechanisms and approaches is supported by the Web Service specifications. The usage of these security mechanisms and protocols is configured by stating security requirements in security policies. However, security policy languages for SOA are complex and difficult to create due to the expressiveness of these languages. To facilitate and simplify the creation of security policies, this thesis presents a model-driven approach that enables the generation of complex security policies on the basis of simple security intentions. SOA architects can specify these intentions in system design models and are not required to deal with complex technical security concepts. The approach introduced in this thesis enables the enhancement of any system design modelling languages - for example FMC or BPMN - with security modelling elements. The syntax, semantics, and notion of these elements is defined by our security modelling language SecureSOA. The metamodel of this language provides extension points to enable the integration into system design modelling languages. In particular, this thesis demonstrates the enhancement of FMC block diagrams with SecureSOA. To enable the model-driven generation of security policies, a domain-independent policy model is introduced in this thesis. This model provides an abstraction layer for security policies. Mappings are used to perform the transformation from our model to security policy languages. However, expert knowledge is required to generate instances of this model on the basis of simple security intentions. Appropriate security mechanisms, protocols and options must be chosen and combined to fulfil these security intentions. In this thesis, a formalised system of security patterns is used to represent this knowledge and to enable an automated transformation process. Moreover, a domain-specific language is introduced to state security patterns in an accessible way. On the basis of this language, a system of security configuration patterns is provided to transform security intentions related to data protection and identity management. The formal semantics of the security pattern language enable the verification of the transformation process introduced in this thesis and prove the correctness of the pattern application. Finally, our SOA Security LAB is presented that demonstrates the application of our model-driven approach to facilitate a dynamic creation, configuration, and execution of secure Web Service-based composed applications.}, language = {en} } @phdthesis{Meyer2015, author = {Meyer, Andreas}, title = {Data perspective in business process management}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-84806}, school = {Universit{\"a}t Potsdam}, pages = {xxi, 362}, year = {2015}, abstract = {Gesch{\"a}ftsprozessmanagement ist ein strukturierter Ansatz zur Modellierung, Analyse, Steuerung und Ausf{\"u}hrung von Gesch{\"a}ftsprozessen, um Gesch{\"a}ftsziele zu erreichen. Es st{\"u}tzt sich dabei auf konzeptionelle Modelle, von denen Prozessmodelle am weitesten verbreitet sind. Prozessmodelle beschreiben wer welche Aufgabe auszuf{\"u}hren hat, um das Gesch{\"a}ftsziel zu erreichen, und welche Informationen daf{\"u}r ben{\"o}tigt werden. Damit beinhalten Prozessmodelle Informationen {\"u}ber den Kontrollfluss, die Zuweisung von Verantwortlichkeiten, den Datenfluss und Informationssysteme. Die Automatisierung von Gesch{\"a}ftsprozessen erh{\"o}ht die Effizienz der Arbeitserledigung und wird durch Process Engines unterst{\"u}tzt. Daf{\"u}r werden jedoch Informationen {\"u}ber den Kontrollfluss, die Zuweisung von Verantwortlichkeiten f{\"u}r Aufgaben und den Datenfluss ben{\"o}tigt. W{\"a}hrend aktuelle Process Engines die ersten beiden Informationen weitgehend automatisiert verarbeiten k{\"o}nnen, m{\"u}ssen Daten manuell implementiert und gewartet werden. Dem entgegen verspricht ein modell-getriebenes Behandeln von Daten eine vereinfachte Implementation in der Process Engine und verringert gleichzeitig die Fehleranf{\"a}lligkeit dank einer graphischen Visualisierung und reduziert den Entwicklungsaufwand durch Codegenerierung. Die vorliegende Dissertation besch{\"a}ftigt sich mit der Modellierung, der Analyse und der Ausf{\"u}hrung von Daten in Gesch{\"a}ftsprozessen. Als formale Basis f{\"u}r die Prozessausf{\"u}hrung wird ein konzeptuelles Framework f{\"u}r die Integration von Prozessen und Daten eingef{\"u}hrt. Dieses Framework wird durch operationelle Semantik erg{\"a}nzt, die mittels einem um Daten erweiterten Petrinetz-Mapping vorgestellt wird. Die modellgetriebene Ausf{\"u}hrung von Daten muss komplexe Datenabh{\"a}ngigkeiten, Prozessdaten und den Datenaustausch ber{\"u}cksichtigen. Letzterer tritt bei der Kommunikation zwischen mehreren Prozessteilnehmern auf. Diese Arbeit nutzt Konzepte aus dem Bereich der Datenbanken und {\"u}berf{\"u}hrt diese ins Gesch{\"a}ftsprozessmanagement, um Datenoperationen zu unterscheiden, um Abh{\"a}ngigkeiten zwischen Datenobjekten des gleichen und verschiedenen Typs zu spezifizieren, um modellierte Datenknoten sowie empfangene Nachrichten zur richtigen laufenden Prozessinstanz zu korrelieren und um Nachrichten f{\"u}r die Prozess{\"u}bergreifende Kommunikation zu generieren. Der entsprechende Ansatz ist nicht auf eine bestimmte Prozessbeschreibungssprache begrenzt und wurde prototypisch implementiert. Die Automatisierung der Datenbehandlung in Gesch{\"a}ftsprozessen erfordert entsprechend annotierte und korrekte Prozessmodelle. Als Unterst{\"u}tzung zur Datenannotierung f{\"u}hrt diese Arbeit einen Algorithmus ein, welcher Informationen {\"u}ber Datenknoten, deren Zust{\"a}nde und Datenabh{\"a}ngigkeiten aus Kontrollflussinformationen extrahiert und die Prozessmodelle entsprechend annotiert. Allerdings k{\"o}nnen gew{\"o}hnlich nicht alle erforderlichen Informationen aus Kontrollflussinformationen extrahiert werden, da detaillierte Angaben {\"u}ber m{\"o}gliche Datenmanipulationen fehlen. Deshalb sind weitere Prozessmodellverfeinerungen notwendig. Basierend auf einer Menge von Objektlebenszyklen kann ein Prozessmodell derart verfeinert werden, dass die in den Objektlebenszyklen spezifizierten Datenmanipulationen automatisiert in ein Prozessmodell {\"u}berf{\"u}hrt werden k{\"o}nnen. Prozessmodelle stellen eine Abstraktion dar. Somit fokussieren sie auf verschiedene Teilbereiche und stellen diese im Detail dar. Solche Detailbereiche sind beispielsweise die Kontrollflusssicht und die Datenflusssicht, welche oft durch Aktivit{\"a}ts-zentrierte beziehungsweise Objekt-zentrierte Prozessmodelle abgebildet werden. In der vorliegenden Arbeit werden Algorithmen zur Transformation zwischen diesen Sichten beschrieben. Zur Sicherstellung der Modellkorrektheit wird das Konzept der „weak conformance" zur {\"U}berpr{\"u}fung der Konsistenz zwischen Objektlebenszyklen und dem Prozessmodell eingef{\"u}hrt. Dabei darf das Prozessmodell nur Datenmanipulationen enthalten, die auch in einem Objektlebenszyklus spezifiziert sind. Die Korrektheit wird mittels Soundness-{\"U}berpr{\"u}fung einer hybriden Darstellung ermittelt, so dass Kontrollfluss- und Datenkorrektheit integriert {\"u}berpr{\"u}ft werden. Um eine korrekte Ausf{\"u}hrung des Prozessmodells zu gew{\"a}hrleisten, m{\"u}ssen gefundene Inkonsistenzen korrigiert werden. Daf{\"u}r werden f{\"u}r jede Inkonsistenz alternative Vorschl{\"a}ge zur Modelladaption identifiziert und vorgeschlagen. Zusammengefasst, unter Einsatz der Ergebnisse dieser Dissertation k{\"o}nnen Gesch{\"a}ftsprozesse modellgetrieben ausgef{\"u}hrt werden unter Ber{\"u}cksichtigung sowohl von Daten als auch den zuvor bereits unterst{\"u}tzten Perspektiven bez{\"u}glich Kontrollfluss und Verantwortlichkeiten. Dabei wird die Modellerstellung teilweise mit automatisierten Algorithmen unterst{\"u}tzt und die Modellkonsistenz durch Datenkorrektheits{\"u}berpr{\"u}fungen gew{\"a}hrleistet.}, language = {en} } @phdthesis{Mueller2016, author = {Mueller, Stefanie}, title = {Interacting with personal fabrication devices}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-100908}, school = {Universit{\"a}t Potsdam}, pages = {xxi, 108}, year = {2016}, abstract = {Personal fabrication tools, such as 3D printers, are on the way of enabling a future in which non-technical users will be able to create custom objects. However, while the hardware is there, the current interaction model behind existing design tools is not suitable for non-technical users. Today, 3D printers are operated by fabricating the object in one go, which tends to take overnight due to the slow 3D printing technology. Consequently, the current interaction model requires users to think carefully before printing as every mistake may imply another overnight print. Planning every step ahead, however, is not feasible for non-technical users as they lack the experience to reason about the consequences of their design decisions. In this dissertation, we propose changing the interaction model around personal fabrication tools to better serve this user group. We draw inspiration from personal computing and argue that the evolution of personal fabrication may resemble the evolution of personal computing: Computing started with machines that executed a program in one go before returning the result to the user. By decreasing the interaction unit to single requests, turn-taking systems such as the command line evolved, which provided users with feedback after every input. Finally, with the introduction of direct-manipulation interfaces, users continuously interacted with a program receiving feedback about every action in real-time. In this dissertation, we explore whether these interaction concepts can be applied to personal fabrication as well. We start with fabricating an object in one go and investigate how to tighten the feedback-cycle on an object-level: We contribute a method called low-fidelity fabrication, which saves up to 90\% fabrication time by creating objects as fast low-fidelity previews, which are sufficient to evaluate key design aspects. Depending on what is currently being tested, we propose different conversions that enable users to focus on different parts: faBrickator allows for a modular design in the early stages of prototyping; when users move on WirePrint allows quickly testing an object's shape, while Platener allows testing an object's technical function. We present an interactive editor for each technique and explain the underlying conversion algorithms. By interacting on smaller units, such as a single element of an object, we explore what it means to transition from systems that fabricate objects in one go to turn-taking systems. We start with a 2D system called constructable: Users draw with a laser pointer onto the workpiece inside a laser cutter. The drawing is captured with an overhead camera. As soon as the the user finishes drawing an element, such as a line, the constructable system beautifies the path and cuts it--resulting in physical output after every editing step. We extend constructable towards 3D editing by developing a novel laser-cutting technique for 3D objects called LaserOrigami that works by heating up the workpiece with the defocused laser until the material becomes compliant and bends down under gravity. While constructable and LaserOrigami allow for fast physical feedback, the interaction is still best described as turn-taking since it consists of two discrete steps: users first create an input and afterwards the system provides physical output. By decreasing the interaction unit even further to a single feature, we can achieve real-time physical feedback: Input by the user and output by the fabrication device are so tightly coupled that no visible lag exists. This allows us to explore what it means to transition from turn-taking interfaces, which only allow exploring one option at a time, to direct manipulation interfaces with real-time physical feedback, which allow users to explore the entire space of options continuously with a single interaction. We present a system called FormFab, which allows for such direct control. FormFab is based on the same principle as LaserOrigami: It uses a workpiece that when warmed up becomes compliant and can be reshaped. However, FormFab achieves the reshaping not based on gravity, but through a pneumatic system that users can control interactively. As users interact, they see the shape change in real-time. We conclude this dissertation by extrapolating the current evolution into a future in which large numbers of people use the new technology to create objects. We see two additional challenges on the horizon: sustainability and intellectual property. We investigate sustainability by demonstrating how to print less and instead patch physical objects. We explore questions around intellectual property with a system called Scotty that transfers objects without creating duplicates, thereby preserving the designer's copyright.}, language = {en} } @phdthesis{Perscheid2013, author = {Perscheid, Michael}, title = {Test-driven fault navigation for debugging reproducible failures}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-68155}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {The correction of software failures tends to be very cost-intensive because their debugging is an often time-consuming development activity. During this activity, developers largely attempt to understand what causes failures: Starting with a test case that reproduces the observable failure they have to follow failure causes on the infection chain back to the root cause (defect). This idealized procedure requires deep knowledge of the system and its behavior because failures and defects can be far apart from each other. Unfortunately, common debugging tools are inadequate for systematically investigating such infection chains in detail. Thus, developers have to rely primarily on their intuition and the localization of failure causes is not time-efficient. To prevent debugging by disorganized trial and error, experienced developers apply the scientific method and its systematic hypothesis-testing. However, even when using the scientific method, the search for failure causes can still be a laborious task. First, lacking expertise about the system makes it hard to understand incorrect behavior and to create reasonable hypotheses. Second, contemporary debugging approaches provide no or only partial support for the scientific method. In this dissertation, we present test-driven fault navigation as a debugging guide for localizing reproducible failures with the scientific method. Based on the analysis of passing and failing test cases, we reveal anomalies and integrate them into a breadth-first search that leads developers to defects. This systematic search consists of four specific navigation techniques that together support the creation, evaluation, and refinement of failure cause hypotheses for the scientific method. First, structure navigation localizes suspicious system parts and restricts the initial search space. Second, team navigation recommends experienced developers for helping with failures. Third, behavior navigation allows developers to follow emphasized infection chains back to root causes. Fourth, state navigation identifies corrupted state and reveals parts of the infection chain automatically. We implement test-driven fault navigation in our Path Tools framework for the Squeak/Smalltalk development environment and limit its computation cost with the help of our incremental dynamic analysis. This lightweight dynamic analysis ensures an immediate debugging experience with our tools by splitting the run-time overhead over multiple test runs depending on developers' needs. Hence, our test-driven fault navigation in combination with our incremental dynamic analysis answers important questions in a short time: where to start debugging, who understands failure causes best, what happened before failures, and which state properties are infected.}, language = {en} } @phdthesis{Polyvyanyy2012, author = {Polyvyanyy, Artem}, title = {Structuring process models}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-59024}, school = {Universit{\"a}t Potsdam}, year = {2012}, abstract = {One can fairly adopt the ideas of Donald E. Knuth to conclude that process modeling is both a science and an art. Process modeling does have an aesthetic sense. Similar to composing an opera or writing a novel, process modeling is carried out by humans who undergo creative practices when engineering a process model. Therefore, the very same process can be modeled in a myriad number of ways. Once modeled, processes can be analyzed by employing scientific methods. Usually, process models are formalized as directed graphs, with nodes representing tasks and decisions, and directed arcs describing temporal constraints between the nodes. Common process definition languages, such as Business Process Model and Notation (BPMN) and Event-driven Process Chain (EPC) allow process analysts to define models with arbitrary complex topologies. The absence of structural constraints supports creativity and productivity, as there is no need to force ideas into a limited amount of available structural patterns. Nevertheless, it is often preferable that models follow certain structural rules. A well-known structural property of process models is (well-)structuredness. A process model is (well-)structured if and only if every node with multiple outgoing arcs (a split) has a corresponding node with multiple incoming arcs (a join), and vice versa, such that the set of nodes between the split and the join induces a single-entry-single-exit (SESE) region; otherwise the process model is unstructured. The motivations for well-structured process models are manifold: (i) Well-structured process models are easier to layout for visual representation as their formalizations are planar graphs. (ii) Well-structured process models are easier to comprehend by humans. (iii) Well-structured process models tend to have fewer errors than unstructured ones and it is less probable to introduce new errors when modifying a well-structured process model. (iv) Well-structured process models are better suited for analysis with many existing formal techniques applicable only for well-structured process models. (v) Well-structured process models are better suited for efficient execution and optimization, e.g., when discovering independent regions of a process model that can be executed concurrently. Consequently, there are process modeling languages that encourage well-structured modeling, e.g., Business Process Execution Language (BPEL) and ADEPT. However, the well-structured process modeling implies some limitations: (i) There exist processes that cannot be formalized as well-structured process models. (ii) There exist processes that when formalized as well-structured process models require a considerable duplication of modeling constructs. Rather than expecting well-structured modeling from start, we advocate for the absence of structural constraints when modeling. Afterwards, automated methods can suggest, upon request and whenever possible, alternative formalizations that are "better" structured, preferably well-structured. In this thesis, we study the problem of automatically transforming process models into equivalent well-structured models. The developed transformations are performed under a strong notion of behavioral equivalence which preserves concurrency. The findings are implemented in a tool, which is publicly available.}, language = {en} } @phdthesis{RoggeSolti2014, author = {Rogge-Solti, Andreas}, title = {Probabilistic Estimation of Unobserved Process Events}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-70426}, school = {Universit{\"a}t Potsdam}, year = {2014}, abstract = {Organizations try to gain competitive advantages, and to increase customer satisfaction. To ensure the quality and efficiency of their business processes, they perform business process management. An important part of process management that happens on the daily operational level is process controlling. A prerequisite of controlling is process monitoring, i.e., keeping track of the performed activities in running process instances. Only by process monitoring can business analysts detect delays and react to deviations from the expected or guaranteed performance of a process instance. To enable monitoring, process events need to be collected from the process environment. When a business process is orchestrated by a process execution engine, monitoring is available for all orchestrated process activities. Many business processes, however, do not lend themselves to automatic orchestration, e.g., because of required freedom of action. This situation is often encountered in hospitals, where most business processes are manually enacted. Hence, in practice it is often inefficient or infeasible to document and monitor every process activity. Additionally, manual process execution and documentation is prone to errors, e.g., documentation of activities can be forgotten. Thus, organizations face the challenge of process events that occur, but are not observed by the monitoring environment. These unobserved process events can serve as basis for operational process decisions, even without exact knowledge of when they happened or when they will happen. An exemplary decision is whether to invest more resources to manage timely completion of a case, anticipating that the process end event will occur too late. This thesis offers means to reason about unobserved process events in a probabilistic way. We address decisive questions of process managers (e.g., "when will the case be finished?", or "when did we perform the activity that we forgot to document?") in this thesis. As main contribution, we introduce an advanced probabilistic model to business process management that is based on a stochastic variant of Petri nets. We present a holistic approach to use the model effectively along the business process lifecycle. Therefore, we provide techniques to discover such models from historical observations, to predict the termination time of processes, and to ensure quality by missing data management. We propose mechanisms to optimize configuration for monitoring and prediction, i.e., to offer guidance in selecting important activities to monitor. An implementation is provided as a proof of concept. For evaluation, we compare the accuracy of the approach with that of state-of-the-art approaches using real process data of a hospital. Additionally, we show its more general applicability in other domains by applying the approach on process data from logistics and finance.}, language = {en} } @phdthesis{Seibel2012, author = {Seibel, Andreas}, title = {Traceability and model management with executable and dynamic hierarchical megamodels}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-64222}, school = {Universit{\"a}t Potsdam}, year = {2012}, abstract = {Nowadays, model-driven engineering (MDE) promises to ease software development by decreasing the inherent complexity of classical software development. In order to deliver on this promise, MDE increases the level of abstraction and automation, through a consideration of domain-specific models (DSMs) and model operations (e.g. model transformations or code generations). DSMs conform to domain-specific modeling languages (DSMLs), which increase the level of abstraction, and model operations are first-class entities of software development because they increase the level of automation. Nevertheless, MDE has to deal with at least two new dimensions of complexity, which are basically caused by the increased linguistic and technological heterogeneity. The first dimension of complexity is setting up an MDE environment, an activity comprised of the implementation or selection of DSMLs and model operations. Setting up an MDE environment is both time-consuming and error-prone because of the implementation or adaptation of model operations. The second dimension of complexity is concerned with applying MDE for actual software development. Applying MDE is challenging because a collection of DSMs, which conform to potentially heterogeneous DSMLs, are required to completely specify a complex software system. A single DSML can only be used to describe a specific aspect of a software system at a certain level of abstraction and from a certain perspective. Additionally, DSMs are usually not independent but instead have inherent interdependencies, reflecting (partial) similar aspects of a software system at different levels of abstraction or from different perspectives. A subset of these dependencies are applications of various model operations, which are necessary to keep the degree of automation high. This becomes even worse when addressing the first dimension of complexity. Due to continuous changes, all kinds of dependencies, including the applications of model operations, must also be managed continuously. This comprises maintaining the existence of these dependencies and the appropriate (re-)application of model operations. The contribution of this thesis is an approach that combines traceability and model management to address the aforementioned challenges of configuring and applying MDE for software development. The approach is considered as a traceability approach because it supports capturing and automatically maintaining dependencies between DSMs. The approach is considered as a model management approach because it supports managing the automated (re-)application of heterogeneous model operations. In addition, the approach is considered as a comprehensive model management. Since the decomposition of model operations is encouraged to alleviate the first dimension of complexity, the subsequent composition of model operations is required to counteract their fragmentation. A significant portion of this thesis concerns itself with providing a method for the specification of decoupled yet still highly cohesive complex compositions of heterogeneous model operations. The approach supports two different kinds of compositions - data-flow compositions and context compositions. Data-flow composition is used to define a network of heterogeneous model operations coupled by sharing input and output DSMs alone. Context composition is related to a concept used in declarative model transformation approaches to compose individual model transformation rules (units) at any level of detail. In this thesis, context composition provides the ability to use a collection of dependencies as context for the composition of other dependencies, including model operations. In addition, the actual implementation of model operations, which are going to be composed, do not need to implement any composition concerns. The approach is realized by means of a formalism called an executable and dynamic hierarchical megamodel, based on the original idea of megamodels. This formalism supports specifying compositions of dependencies (traceability and model operations). On top of this formalism, traceability is realized by means of a localization concept, and model management by means of an execution concept.}, language = {en} } @phdthesis{Semmo2016, author = {Semmo, Amir}, title = {Design and implementation of non-photorealistic rendering techniques for 3D geospatial data}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-99525}, school = {Universit{\"a}t Potsdam}, pages = {XVI, 155}, year = {2016}, abstract = {Geospatial data has become a natural part of a growing number of information systems and services in the economy, society, and people's personal lives. In particular, virtual 3D city and landscape models constitute valuable information sources within a wide variety of applications such as urban planning, navigation, tourist information, and disaster management. Today, these models are often visualized in detail to provide realistic imagery. However, a photorealistic rendering does not automatically lead to high image quality, with respect to an effective information transfer, which requires important or prioritized information to be interactively highlighted in a context-dependent manner. Approaches in non-photorealistic renderings particularly consider a user's task and camera perspective when attempting optimal expression, recognition, and communication of important or prioritized information. However, the design and implementation of non-photorealistic rendering techniques for 3D geospatial data pose a number of challenges, especially when inherently complex geometry, appearance, and thematic data must be processed interactively. Hence, a promising technical foundation is established by the programmable and parallel computing architecture of graphics processing units. This thesis proposes non-photorealistic rendering techniques that enable both the computation and selection of the abstraction level of 3D geospatial model contents according to user interaction and dynamically changing thematic information. To achieve this goal, the techniques integrate with hardware-accelerated rendering pipelines using shader technologies of graphics processing units for real-time image synthesis. The techniques employ principles of artistic rendering, cartographic generalization, and 3D semiotics—unlike photorealistic rendering—to synthesize illustrative renditions of geospatial feature type entities such as water surfaces, buildings, and infrastructure networks. In addition, this thesis contributes a generic system that enables to integrate different graphic styles—photorealistic and non-photorealistic—and provide their seamless transition according to user tasks, camera view, and image resolution. Evaluations of the proposed techniques have demonstrated their significance to the field of geospatial information visualization including topics such as spatial perception, cognition, and mapping. In addition, the applications in illustrative and focus+context visualization have reflected their potential impact on optimizing the information transfer regarding factors such as cognitive load, integration of non-realistic information, visualization of uncertainty, and visualization on small displays.}, language = {en} } @phdthesis{Shaabani2020, author = {Shaabani, Nuhad}, title = {On discovering and incrementally updating inclusion dependencies}, doi = {10.25932/publishup-47186}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-471862}, school = {Universit{\"a}t Potsdam}, pages = {119}, year = {2020}, abstract = {In today's world, many applications produce large amounts of data at an enormous rate. Analyzing such datasets for metadata is indispensable for effectively understanding, storing, querying, manipulating, and mining them. Metadata summarizes technical properties of a dataset which rang from basic statistics to complex structures describing data dependencies. One type of dependencies is inclusion dependency (IND), which expresses subset-relationships between attributes of datasets. Therefore, inclusion dependencies are important for many data management applications in terms of data integration, query optimization, schema redesign, or integrity checking. So, the discovery of inclusion dependencies in unknown or legacy datasets is at the core of any data profiling effort. For exhaustively detecting all INDs in large datasets, we developed S-indd++, a new algorithm that eliminates the shortcomings of existing IND-detection algorithms and significantly outperforms them. S-indd++ is based on a novel concept for the attribute clustering for efficiently deriving INDs. Inferring INDs from our attribute clustering eliminates all redundant operations caused by other algorithms. S-indd++ is also based on a novel partitioning strategy that enables discording a large number of candidates in early phases of the discovering process. Moreover, S-indd++ does not require to fit a partition into the main memory--this is a highly appreciable property in the face of ever-growing datasets. S-indd++ reduces up to 50\% of the runtime of the state-of-the-art approach. None of the approach for discovering INDs is appropriate for the application on dynamic datasets; they can not update the INDs after an update of the dataset without reprocessing it entirely. To this end, we developed the first approach for incrementally updating INDs in frequently changing datasets. We achieved that by reducing the problem of incrementally updating INDs to the incrementally updating the attribute clustering from which all INDs are efficiently derivable. We realized the update of the clusters by designing new operations to be applied to the clusters after every data update. The incremental update of INDs reduces the time of the complete rediscovery by up to 99.999\%. All existing algorithms for discovering n-ary INDs are based on the principle of candidate generation--they generate candidates and test their validity in the given data instance. The major disadvantage of this technique is the exponentially growing number of database accesses in terms of SQL queries required for validation. We devised Mind2, the first approach for discovering n-ary INDs without candidate generation. Mind2 is based on a new mathematical framework developed in this thesis for computing the maximum INDs from which all other n-ary INDs are derivable. The experiments showed that Mind2 is significantly more scalable and effective than hypergraph-based algorithms.}, language = {en} } @phdthesis{Smirnov2011, author = {Smirnov, Sergey}, title = {Business process model abstraction}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-60258}, school = {Universit{\"a}t Potsdam}, year = {2011}, abstract = {Business process models are used within a range of organizational initiatives, where every stakeholder has a unique perspective on a process and demands the respective model. As a consequence, multiple process models capturing the very same business process coexist. Keeping such models in sync is a challenge within an ever changing business environment: once a process is changed, all its models have to be updated. Due to a large number of models and their complex relations, model maintenance becomes error-prone and expensive. Against this background, business process model abstraction emerged as an operation reducing the number of stored process models and facilitating model management. Business process model abstraction is an operation preserving essential process properties and leaving out insignificant details in order to retain information relevant for a particular purpose. Process model abstraction has been addressed by several researchers. The focus of their studies has been on particular use cases and model transformations supporting these use cases. This thesis systematically approaches the problem of business process model abstraction shaping the outcome into a framework. We investigate the current industry demand in abstraction summarizing it in a catalog of business process model abstraction use cases. The thesis focuses on one prominent use case where the user demands a model with coarse-grained activities and overall process ordering constraints. We develop model transformations that support this use case starting with the transformations based on process model structure analysis. Further, abstraction methods considering the semantics of process model elements are investigated. First, we suggest how semantically related activities can be discovered in process models-a barely researched challenge. The thesis validates the designed abstraction methods against sets of industrial process models and discusses the method implementation aspects. Second, we develop a novel model transformation, which combined with the related activity discovery allows flexible non-hierarchical abstraction. In this way this thesis advocates novel model transformations that facilitate business process model management and provides the foundations for innovative tool support.}, language = {en} } @phdthesis{Steinert2014, author = {Steinert, Bastian}, title = {Built-in recovery support for explorative programming}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-71305}, school = {Universit{\"a}t Potsdam}, year = {2014}, abstract = {This work introduces concepts and corresponding tool support to enable a complementary approach in dealing with recovery. Programmers need to recover a development state, or a part thereof, when previously made changes reveal undesired implications. However, when the need arises suddenly and unexpectedly, recovery often involves expensive and tedious work. To avoid tedious work, literature recommends keeping away from unexpected recovery demands by following a structured and disciplined approach, which consists of the application of various best practices including working only on one thing at a time, performing small steps, as well as making proper use of versioning and testing tools. However, the attempt to avoid unexpected recovery is both time-consuming and error-prone. On the one hand, it requires disproportionate effort to minimize the risk of unexpected situations. On the other hand, applying recommended practices selectively, which saves time, can hardly avoid recovery. In addition, the constant need for foresight and self-control has unfavorable implications. It is exhaustive and impedes creative problem solving. This work proposes to make recovery fast and easy and introduces corresponding support called CoExist. Such dedicated support turns situations of unanticipated recovery from tedious experiences into pleasant ones. It makes recovery fast and easy to accomplish, even if explicit commits are unavailable or tests have been ignored for some time. When mistakes and unexpected insights are no longer associated with tedious corrective actions, programmers are encouraged to change source code as a means to reason about it, as opposed to making changes only after structuring and evaluating them mentally. This work further reports on an implementation of the proposed tool support in the Squeak/Smalltalk development environment. The development of the tools has been accompanied by regular performance and usability tests. In addition, this work investigates whether the proposed tools affect programmers' performance. In a controlled lab study, 22 participants improved the design of two different applications. Using a repeated measurement setup, the study examined the effect of providing CoExist on programming performance. The result of analyzing 88 hours of programming suggests that built-in recovery support as provided with CoExist positively has a positive effect on programming performance in explorative programming tasks.}, language = {en} } @phdthesis{Steinmetz2013, author = {Steinmetz, Nadine}, title = {Context-aware semantic analysis of video metadata}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-70551}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {Im Vergleich zu einer stichwortbasierten Suche erm{\"o}glicht die semantische Suche ein pr{\"a}ziseres und anspruchsvolleres Durchsuchen von (Web)-Dokumenten, weil durch die explizite Semantik Mehrdeutigkeiten von nat{\"u}rlicher Sprache vermieden und semantische Beziehungen in das Suchergebnis einbezogen werden k{\"o}nnen. Eine semantische, Entit{\"a}ten-basierte Suche geht von einer Anfrage mit festgelegter Bedeutung aus und liefert nur Dokumente, die mit dieser Entit{\"a}t annotiert sind als Suchergebnis. Die wichtigste Voraussetzung f{\"u}r eine Entit{\"a}ten-zentrierte Suche stellt die Annotation der Dokumente im Archiv mit Entit{\"a}ten und Kategorien dar. Textuelle Informationen werden analysiert und mit den entsprechenden Entit{\"a}ten und Kategorien versehen, um den Inhalt semantisch erschließen zu k{\"o}nnen. Eine manuelle Annotation erfordert Dom{\"a}nenwissen und ist sehr zeitaufwendig. Die semantische Annotation von Videodokumenten erfordert besondere Aufmerksamkeit, da inhaltsbasierte Metadaten von Videos aus verschiedenen Quellen stammen, verschiedene Eigenschaften und Zuverl{\"a}ssigkeiten besitzen und daher nicht wie Fließtext behandelt werden k{\"o}nnen. Die vorliegende Arbeit stellt einen semantischen Analyseprozess f{\"u}r Video-Metadaten vor. Die Eigenschaften der verschiedenen Metadatentypen werden analysiert und ein Konfidenzwert ermittelt. Dieser Wert spiegelt die Korrektheit und die wahrscheinliche Mehrdeutigkeit eines Metadatums wieder. Beginnend mit dem Metadatum mit dem h{\"o}chsten Konfidenzwert wird der Analyseprozess innerhalb eines Kontexts in absteigender Reihenfolge des Konfidenzwerts durchgef{\"u}hrt. Die bereits analysierten Metadaten dienen als Referenzpunkt f{\"u}r die weiteren Analysen. So kann eine m{\"o}glichst korrekte Analyse der heterogen strukturierten Daten eines Kontexts sichergestellt werden. Am Ende der Analyse eines Metadatums wird die f{\"u}r den Kontext relevanteste Entit{\"a}t aus einer Liste von Kandidaten identifiziert - das Metadatum wird disambiguiert. Hierf{\"u}r wurden verschiedene Disambiguierungsalgorithmen entwickelt, die Beschreibungstexte und semantische Beziehungen der Entit{\"a}tenkandidaten zum gegebenen Kontext in Betracht ziehen. Der Kontext f{\"u}r die Disambiguierung wird f{\"u}r jedes Metadatum anhand der Eigenschaften und Konfidenzwerte zusammengestellt. Der vorgestellte Analyseprozess ist an zwei Hypothesen angelehnt: Um die Analyseergebnisse verbessern zu k{\"o}nnen, sollten die Metadaten eines Kontexts in absteigender Reihenfolge ihres Konfidenzwertes verarbeitet werden und die Kontextgrenzen von Videometadaten sollten durch Segmentgrenzen definiert werden, um m{\"o}glichst Kontexte mit koh{\"a}rentem Inhalt zu erhalten. Durch ausf{\"u}hrliche Evaluationen konnten die gestellten Hypothesen best{\"a}tigt werden. Der Analyseprozess wurden gegen mehrere State-of-the-Art Methoden verglichen und erzielt verbesserte Ergebnisse in Bezug auf Recall und Precision, besonders f{\"u}r Metadaten, die aus weniger zuverl{\"a}ssigen Quellen stammen. Der Analyseprozess ist Teil eines Videoanalyse-Frameworks und wurde bereits erfolgreich in verschiedenen Projekten eingesetzt.}, language = {en} } @phdthesis{Takouna2014, author = {Takouna, Ibrahim}, title = {Energy-efficient and performance-aware virtual machine management for cloud data centers}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-72399}, school = {Universit{\"a}t Potsdam}, year = {2014}, abstract = {Virtualisierte Cloud Datenzentren stellen nach Bedarf Ressourcen zur Verf{\"u}gu-ng, erm{\"o}glichen agile Ressourcenbereitstellung und beherbergen heterogene Applikationen mit verschiedenen Anforderungen an Ressourcen. Solche Datenzentren verbrauchen enorme Mengen an Energie, was die Erh{\"o}hung der Betriebskosten, der W{\"a}rme innerhalb der Zentren und des Kohlendioxidausstoßes verursacht. Der Anstieg des Energieverbrauches kann durch ein ineffektives Ressourcenmanagement, das die ineffiziente Ressourcenausnutzung verursacht, entstehen. Die vorliegende Dissertation stellt detaillierte Modelle und neue Verfahren f{\"u}r virtualisiertes Ressourcenmanagement in Cloud Datenzentren vor. Die vorgestellten Verfahren ziehen das Service-Level-Agreement (SLA) und die Heterogenit{\"a}t der Auslastung bez{\"u}glich des Bedarfs an Speicherzugriffen und Kommunikationsmustern von Web- und HPC- (High Performance Computing) Applikationen in Betracht. Um die pr{\"a}sentierten Techniken zu evaluieren, verwenden wir Simulationen und echte Protokollierung der Auslastungen von Web- und HPC- Applikationen. Außerdem vergleichen wir unser Techniken und Verfahren mit anderen aktuellen Verfahren durch die Anwendung von verschiedenen Performance Metriken. Die Hauptbeitr{\"a}ge dieser Dissertation sind Folgendes: Ein Proaktives auf robuster Optimierung basierendes Ressourcenbereitstellungsverfahren. Dieses Verfahren erh{\"o}ht die F{\"a}higkeit der Hostes zur Verf{\"u}g-ungsstellung von mehr VMs. Gleichzeitig aber wird der unn{\"o}tige Energieverbrauch minimiert. Zus{\"a}tzlich mindert diese Technik unerw{\"u}nschte {\"A}nde-rungen im Energiezustand des Servers. Die vorgestellte Technik nutzt einen auf Intervall basierenden Vorhersagealgorithmus zur Implementierung einer robusten Optimierung. Dabei werden unsichere Anforderungen in Betracht gezogen. Ein adaptives und auf Intervall basierendes Verfahren zur Vorhersage des Arbeitsaufkommens mit hohen, in k{\"u}rzer Zeit auftretenden Schwankungen. Die Intervall basierende Vorhersage ist implementiert in der Standard Abweichung Variante und in der Median absoluter Abweichung Variante. Die Intervall-{\"A}nderungen basieren auf einem adaptiven Vertrauensfenster um die Schwankungen des Arbeitsaufkommens zu bew{\"a}ltigen. Eine robuste VM Zusammenlegung f{\"u}r ein effizientes Energie und Performance Management. Dies erm{\"o}glicht die gegenseitige Abh{\"a}ngigkeit zwischen der Energie und der Performance zu minimieren. Unser Verfahren reduziert die Anzahl der VM-Migrationen im Vergleich mit den neu vor kurzem vorgestellten Verfahren. Dies tr{\"a}gt auch zur Reduzierung des durch das Netzwerk verursachten Energieverbrauches. Außerdem reduziert dieses Verfahren SLA-Verletzungen und die Anzahl von {\"A}nderungen an Energiezus-t{\"a}nden. Ein generisches Modell f{\"u}r das Netzwerk eines Datenzentrums um die verz{\"o}-gerte Kommunikation und ihre Auswirkung auf die VM Performance und auf die Netzwerkenergie zu simulieren. Außerdem wird ein generisches Modell f{\"u}r ein Memory-Bus des Servers vorgestellt. Dieses Modell beinhaltet auch Modelle f{\"u}r die Latenzzeit und den Energieverbrauch f{\"u}r verschiedene Memory Frequenzen. Dies erlaubt eine Simulation der Memory Verz{\"o}gerung und ihre Auswirkung auf die VM-Performance und auf den Memory Energieverbrauch. Kommunikation bewusste und Energie effiziente Zusammenlegung f{\"u}r parallele Applikationen um die dynamische Entdeckung von Kommunikationsmustern und das Umplanen von VMs zu erm{\"o}glichen. Das Umplanen von VMs benutzt eine auf den entdeckten Kommunikationsmustern basierende Migration. Eine neue Technik zur Entdeckung von dynamischen Mustern ist implementiert. Sie basiert auf der Signal Verarbeitung des Netzwerks von VMs, anstatt die Informationen des virtuellen Umstellung der Hosts oder der Initiierung der VMs zu nutzen. Das Ergebnis zeigt, dass unsere Methode die durchschnittliche Anwendung des Netzwerks reduziert und aufgrund der Reduzierung der aktiven Umstellungen Energie gespart. Außerdem bietet sie eine bessere VM Performance im Vergleich zu der CPU-basierten Platzierung. Memory bewusste VM Zusammenlegung f{\"u}r unabh{\"a}ngige VMs. Sie nutzt die Vielfalt des VMs Memory Zuganges um die Anwendung vom Memory-Bus der Hosts zu balancieren. Die vorgestellte Technik, Memory-Bus Load Balancing (MLB), verteilt die VMs reaktiv neu im Bezug auf ihre Anwendung vom Memory-Bus. Sie nutzt die VM Migration um die Performance des gesamtem Systems zu verbessern. Außerdem sind die dynamische Spannung, die Frequenz Skalierung des Memory und die MLB Methode kombiniert um ein besseres Energiesparen zu leisten.}, language = {en} } @phdthesis{Tinnefeld2014, author = {Tinnefeld, Christian}, title = {Building a columnar database on shared main memory-based storage}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-72063}, school = {Universit{\"a}t Potsdam}, pages = {175}, year = {2014}, abstract = {In the field of disk-based parallel database management systems exists a great variety of solutions based on a shared-storage or a shared-nothing architecture. In contrast, main memory-based parallel database management systems are dominated solely by the shared-nothing approach as it preserves the in-memory performance advantage by processing data locally on each server. We argue that this unilateral development is going to cease due to the combination of the following three trends: a) Nowadays network technology features remote direct memory access (RDMA) and narrows the performance gap between accessing main memory inside a server and of a remote server to and even below a single order of magnitude. b) Modern storage systems scale gracefully, are elastic, and provide high-availability. c) A modern storage system such as Stanford's RAMCloud even keeps all data resident in main memory. Exploiting these characteristics in the context of a main-memory parallel database management system is desirable. The advent of RDMA-enabled network technology makes the creation of a parallel main memory DBMS based on a shared-storage approach feasible. This thesis describes building a columnar database on shared main memory-based storage. The thesis discusses the resulting architecture (Part I), the implications on query processing (Part II), and presents an evaluation of the resulting solution in terms of performance, high-availability, and elasticity (Part III). In our architecture, we use Stanford's RAMCloud as shared-storage, and the self-designed and developed in-memory AnalyticsDB as relational query processor on top. AnalyticsDB encapsulates data access and operator execution via an interface which allows seamless switching between local and remote main memory, while RAMCloud provides not only storage capacity, but also processing power. Combining both aspects allows pushing-down the execution of database operators into the storage system. We describe how the columnar data processed by AnalyticsDB is mapped to RAMCloud's key-value data model and how the performance advantages of columnar data storage can be preserved. The combination of fast network technology and the possibility to execute database operators in the storage system opens the discussion for site selection. We construct a system model that allows the estimation of operator execution costs in terms of network transfer, data processed in memory, and wall time. This can be used for database operators that work on one relation at a time - such as a scan or materialize operation - to discuss the site selection problem (data pull vs. operator push). Since a database query translates to the execution of several database operators, it is possible that the optimal site selection varies per operator. For the execution of a database operator that works on two (or more) relations at a time, such as a join, the system model is enriched by additional factors such as the chosen algorithm (e.g. Grace- vs. Distributed Block Nested Loop Join vs. Cyclo-Join), the data partitioning of the respective relations, and their overlapping as well as the allowed resource allocation. We present an evaluation on a cluster with 60 nodes where all nodes are connected via RDMA-enabled network equipment. We show that query processing performance is about 2.4x slower if everything is done via the data pull operator execution strategy (i.e. RAMCloud is being used only for data access) and about 27\% slower if operator execution is also supported inside RAMCloud (in comparison to operating only on main memory inside a server without any network communication at all). The fast-crash recovery feature of RAMCloud can be leveraged to provide high-availability, e.g. a server crash during query execution only delays the query response for about one second. Our solution is elastic in a way that it can adapt to changing workloads a) within seconds, b) without interruption of the ongoing query processing, and c) without manual intervention.}, language = {en} } @phdthesis{Trapp2013, author = {Trapp, Matthias}, title = {Interactive rendering techniques for focus+context visualization of 3D geovirtual environments}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-66824}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {This thesis introduces a collection of new real-time rendering techniques and applications for focus+context visualization of interactive 3D geovirtual environments such as virtual 3D city and landscape models. These environments are generally characterized by a large number of objects and are of high complexity with respect to geometry and textures. For these reasons, their interactive 3D rendering represents a major challenge. Their 3D depiction implies a number of weaknesses such as occlusions, cluttered image contents, and partial screen-space usage. To overcome these limitations and, thus, to facilitate the effective communication of geo-information, principles of focus+context visualization can be used for the design of real-time 3D rendering techniques for 3D geovirtual environments (see Figure). In general, detailed views of a 3D geovirtual environment are combined seamlessly with abstracted views of the context within a single image. To perform the real-time image synthesis required for interactive visualization, dedicated parallel processors (GPUs) for rasterization of computer graphics primitives are used. For this purpose, the design and implementation of appropriate data structures and rendering pipelines are necessary. The contribution of this work comprises the following five real-time rendering methods: • The rendering technique for 3D generalization lenses enables the combination of different 3D city geometries (e.g., generalized versions of a 3D city model) in a single image in real time. The method is based on a generalized and fragment-precise clipping approach, which uses a compressible, raster-based data structure. It enables the combination of detailed views in the focus area with the representation of abstracted variants in the context area. • The rendering technique for the interactive visualization of dynamic raster data in 3D geovirtual environments facilitates the rendering of 2D surface lenses. It enables a flexible combination of different raster layers (e.g., aerial images or videos) using projective texturing for decoupling image and geometry data. Thus, various overlapping and nested 2D surface lenses of different contents can be visualized interactively. • The interactive rendering technique for image-based deformation of 3D geovirtual environments enables the real-time image synthesis of non-planar projections, such as cylindrical and spherical projections, as well as multi-focal 3D fisheye-lenses and the combination of planar and non-planar projections. • The rendering technique for view-dependent multi-perspective views of 3D geovirtual environments, based on the application of global deformations to the 3D scene geometry, can be used for synthesizing interactive panorama maps to combine detailed views close to the camera (focus) with abstract views in the background (context). This approach reduces occlusions, increases the usage the available screen space, and reduces the overload of image contents. • The object-based and image-based rendering techniques for highlighting objects and focus areas inside and outside the view frustum facilitate preattentive perception. The concepts and implementations of interactive image synthesis for focus+context visualization and their selected applications enable a more effective communication of spatial information, and provide building blocks for design and development of new applications and systems in the field of 3D geovirtual environments.}, language = {en} } @phdthesis{Truemper2014, author = {Tr{\"u}mper, Jonas}, title = {Visualization techniques for the analysis of software behavior and related structures}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-72145}, school = {Universit{\"a}t Potsdam}, year = {2014}, abstract = {Software maintenance encompasses any changes made to a software system after its initial deployment and is thereby one of the key phases in the typical software-engineering lifecycle. In software maintenance, we primarily need to understand structural and behavioral aspects, which are difficult to obtain, e.g., by code reading. Software analysis is therefore a vital tool for maintaining these systems: It provides - the preferably automated - means to extract and evaluate information from their artifacts such as software structure, runtime behavior, and related processes. However, such analysis typically results in massive raw data, so that even experienced engineers face difficulties directly examining, assessing, and understanding these data. Among other things, they require tools with which to explore the data if no clear question can be formulated beforehand. For this, software analysis and visualization provide its users with powerful interactive means. These enable the automation of tasks and, particularly, the acquisition of valuable and actionable insights into the raw data. For instance, one means for exploring runtime behavior is trace visualization. This thesis aims at extending and improving the tool set for visual software analysis by concentrating on several open challenges in the fields of dynamic and static analysis of software systems. This work develops a series of concepts and tools for the exploratory visualization of the respective data to support users in finding and retrieving information on the system artifacts concerned. This is a difficult task, due to the lack of appropriate visualization metaphors; in particular, the visualization of complex runtime behavior poses various questions and challenges of both a technical and conceptual nature. This work focuses on a set of visualization techniques for visually representing control-flow related aspects of software traces from shared-memory software systems: A trace-visualization concept based on icicle plots aids in understanding both single-threaded as well as multi-threaded runtime behavior on the function level. The concept's extensibility further allows the visualization and analysis of specific aspects of multi-threading such as synchronization, the correlation of such traces with data from static software analysis, and a comparison between traces. Moreover, complementary techniques for simultaneously analyzing system structures and the evolution of related attributes are proposed. These aim at facilitating long-term planning of software architecture and supporting management decisions in software projects by extensions to the circular-bundle-view technique: An extension to 3-dimensional space allows for the use of additional variables simultaneously; interaction techniques allow for the modification of structures in a visual manner. The concepts and techniques presented here are generic and, as such, can be applied beyond software analysis for the visualization of similarly structured data. The techniques' practicability is demonstrated by several qualitative studies using subject data from industry-scale software systems. The studies provide initial evidence that the techniques' application yields useful insights into the subject data and its interrelationships in several scenarios.}, language = {en} } @phdthesis{Wang2016, author = {Wang, Cheng}, title = {Deep Learning of Multimodal Representations}, school = {Universit{\"a}t Potsdam}, pages = {142}, year = {2016}, language = {en} } @phdthesis{Wang2011, author = {Wang, Long}, title = {X-tracking the usage interest on web sites}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-51077}, school = {Universit{\"a}t Potsdam}, year = {2011}, abstract = {The exponential expanding of the numbers of web sites and Internet users makes WWW the most important global information resource. From information publishing and electronic commerce to entertainment and social networking, the Web allows an inexpensive and efficient access to the services provided by individuals and institutions. The basic units for distributing these services are the web sites scattered throughout the world. However, the extreme fragility of web services and content, the high competence between similar services supplied by different sites, and the wide geographic distributions of the web users drive the urgent requirement from the web managers to track and understand the usage interest of their web customers. This thesis, "X-tracking the Usage Interest on Web Sites", aims to fulfill this requirement. "X" stands two meanings: one is that the usage interest differs from various web sites, and the other is that usage interest is depicted from multi aspects: internal and external, structural and conceptual, objective and subjective. "Tracking" shows that our concentration is on locating and measuring the differences and changes among usage patterns. This thesis presents the methodologies on discovering usage interest on three kinds of web sites: the public information portal site, e-learning site that provides kinds of streaming lectures and social site that supplies the public discussions on IT issues. On different sites, we concentrate on different issues related with mining usage interest. The educational information portal sites were the first implementation scenarios on discovering usage patterns and optimizing the organization of web services. In such cases, the usage patterns are modeled as frequent page sets, navigation paths, navigation structures or graphs. However, a necessary requirement is to rebuild the individual behaviors from usage history. We give a systematic study on how to rebuild individual behaviors. Besides, this thesis shows a new strategy on building content clusters based on pair browsing retrieved from usage logs. The difference between such clusters and the original web structure displays the distance between the destinations from usage side and the expectations from design side. Moreover, we study the problem on tracking the changes of usage patterns in their life cycles. The changes are described from internal side integrating conceptual and structure features, and from external side for the physical features; and described from local side measuring the difference between two time spans, and global side showing the change tendency along the life cycle. A platform, Web-Cares, is developed to discover the usage interest, to measure the difference between usage interest and site expectation and to track the changes of usage patterns. E-learning site provides the teaching materials such as slides, recorded lecture videos and exercise sheets. We focus on discovering the learning interest on streaming lectures, such as real medias, mp4 and flash clips. Compared to the information portal site, the usage on streaming lectures encapsulates the variables such as viewing time and actions during learning processes. The learning interest is discovered in the form of answering 6 questions, which covers finding the relations between pieces of lectures and the preference among different forms of lectures. We prefer on detecting the changes of learning interest on the same course from different semesters. The differences on the content and structure between two courses leverage the changes on the learning interest. We give an algorithm on measuring the difference on learning interest integrated with similarity comparison between courses. A search engine, TASK-Moniminer, is created to help the teacher query the learning interest on their streaming lectures on tele-TASK site. Social site acts as an online community attracting web users to discuss the common topics and share their interesting information. Compared to the public information portal site and e-learning web site, the rich interactions among users and web content bring the wider range of content quality, on the other hand, provide more possibilities to express and model usage interest. We propose a framework on finding and recommending high reputation articles in a social site. We observed that the reputation is classified into global and local categories; the quality of the articles having high reputation is related with the content features. Based on these observations, our framework is implemented firstly by finding the articles having global or local reputation, and secondly clustering articles based on their content relations, and then the articles are selected and recommended from each cluster based on their reputation ranks.}, language = {en} } @phdthesis{Weidlich2011, author = {Weidlich, Matthias}, title = {Behavioural profiles : a relational approach to behaviour consistency}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-55590}, school = {Universit{\"a}t Potsdam}, year = {2011}, abstract = {Business Process Management (BPM) emerged as a means to control, analyse, and optimise business operations. Conceptual models are of central importance for BPM. Most prominently, process models define the behaviour that is performed to achieve a business value. In essence, a process model is a mapping of properties of the original business process to the model, created for a purpose. Different modelling purposes, therefore, result in different models of a business process. Against this background, the misalignment of process models often observed in the field of BPM is no surprise. Even if the same business scenario is considered, models created for strategic decision making differ in content significantly from models created for process automation. Despite their differences, process models that refer to the same business process should be consistent, i.e., free of contradictions. Apparently, there is a trade-off between strictness of a notion of consistency and appropriateness of process models serving different purposes. Existing work on consistency analysis builds upon behaviour equivalences and hierarchical refinements between process models. Hence, these approaches are computationally hard and do not offer the flexibility to gradually relax consistency requirements towards a certain setting. This thesis presents a framework for the analysis of behaviour consistency that takes a fundamentally different approach. As a first step, an alignment between corresponding elements of related process models is constructed. Then, this thesis conducts behavioural analysis grounded on a relational abstraction of the behaviour of a process model, its behavioural profile. Different variants of these profiles are proposed, along with efficient computation techniques for a broad class of process models. Using behavioural profiles, consistency of an alignment between process models is judged by different notions and measures. The consistency measures are also adjusted to assess conformance of process logs that capture the observed execution of a process. Further, this thesis proposes various complementary techniques to support consistency management. It elaborates on how to implement consistent change propagation between process models, addresses the exploration of behavioural commonalities and differences, and proposes a model synthesis for behavioural profiles.}, language = {en} } @phdthesis{Wist2011, author = {Wist, Dominic}, title = {Attacking complexity in logic synthesis of asynchronous circuits}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-59706}, school = {Universit{\"a}t Potsdam}, year = {2011}, abstract = {Most of the microelectronic circuits fabricated today are synchronous, i.e. they are driven by one or several clock signals. Synchronous circuit design faces several fundamental challenges such as high-speed clock distribution, integration of multiple cores operating at different clock rates, reduction of power consumption and dealing with voltage, temperature, manufacturing and runtime variations. Asynchronous or clockless design plays a key role in alleviating these challenges, however the design and test of asynchronous circuits is much more difficult in comparison to their synchronous counterparts. A driving force for a widespread use of asynchronous technology is the availability of mature EDA (Electronic Design Automation) tools which provide an entire automated design flow starting from an HDL (Hardware Description Language) specification yielding the final circuit layout. Even though there was much progress in developing such EDA tools for asynchronous circuit design during the last two decades, the maturity level as well as the acceptance of them is still not comparable with tools for synchronous circuit design. In particular, logic synthesis (which implies the application of Boolean minimisation techniques) for the entire system's control path can significantly improve the efficiency of the resulting asynchronous implementation, e.g. in terms of chip area and performance. However, logic synthesis, in particular for asynchronous circuits, suffers from complexity problems. Signal Transitions Graphs (STGs) are labelled Petri nets which are a widely used to specify the interface behaviour of speed independent (SI) circuits - a robust subclass of asynchronous circuits. STG decomposition is a promising approach to tackle complexity problems like state space explosion in logic synthesis of SI circuits. The (structural) decomposition of STGs is guided by a partition of the output signals and generates a usually much smaller component STG for each partition member, i.e. a component STG with a much smaller state space than the initial specification. However, decomposition can result in component STGs that in isolation have so-called irreducible CSC conflicts (i.e. these components are not SI synthesisable anymore) even if the specification has none of them. A new approach is presented to avoid such conflicts by introducing internal communication between the components. So far, STG decompositions are guided by the finest output partitions, i.e. one output per component. However, this might not yield optimal circuit implementations. Efficient heuristics are presented to determine coarser partitions leading to improved circuits in terms of chip area. For the new algorithms correctness proofs are given and their implementations are incorporated into the decomposition tool DESIJ. The presented techniques are successfully applied to some benchmarks - including 'real-life' specifications arising in the context of control resynthesis - which delivered promising results.}, language = {en} } @phdthesis{Waetzoldt2016, author = {W{\"a}tzoldt, Sebastian}, title = {Modeling collaborations in adaptive systems of systems}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-97494}, school = {Universit{\"a}t Potsdam}, pages = {XII, 380}, year = {2016}, abstract = {Recently, due to an increasing demand on functionality and flexibility, beforehand isolated systems have become interconnected to gain powerful adaptive Systems of Systems (SoS) solutions with an overall robust, flexible and emergent behavior. The adaptive SoS comprises a variety of different system types ranging from small embedded to adaptive cyber-physical systems. On the one hand, each system is independent, follows a local strategy and optimizes its behavior to reach its goals. On the other hand, systems must cooperate with each other to enrich the overall functionality to jointly perform on the SoS level reaching global goals, which cannot be satisfied by one system alone. Due to difficulties of local and global behavior optimizations conflicts may arise between systems that have to be solved by the adaptive SoS. This thesis proposes a modeling language that facilitates the description of an adaptive SoS by considering the adaptation capabilities in form of feedback loops as first class entities. Moreover, this thesis adopts the Models@runtime approach to integrate the available knowledge in the systems as runtime models into the modeled adaptation logic. Furthermore, the modeling language focuses on the description of system interactions within the adaptive SoS to reason about individual system functionality and how it emerges via collaborations to an overall joint SoS behavior. Therefore, the modeling language approach enables the specification of local adaptive system behavior, the integration of knowledge in form of runtime models and the joint interactions via collaboration to place the available adaptive behavior in an overall layered, adaptive SoS architecture. Beside the modeling language, this thesis proposes analysis rules to investigate the modeled adaptive SoS, which enables the detection of architectural patterns as well as design flaws and pinpoints to possible system threats. Moreover, a simulation framework is presented, which allows the direct execution of the modeled SoS architecture. Therefore, the analysis rules and the simulation framework can be used to verify the interplay between systems as well as the modeled adaptation effects within the SoS. This thesis realizes the proposed concepts of the modeling language by mapping them to a state of the art standard from the automotive domain and thus, showing their applicability to actual systems. Finally, the modeling language approach is evaluated by remodeling up to date research scenarios from different domains, which demonstrates that the modeling language concepts are powerful enough to cope with a broad range of existing research problems.}, language = {en} }