@phdthesis{Steinmetz2013, author = {Steinmetz, Nadine}, title = {Context-aware semantic analysis of video metadata}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-70551}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {Im Vergleich zu einer stichwortbasierten Suche erm{\"o}glicht die semantische Suche ein pr{\"a}ziseres und anspruchsvolleres Durchsuchen von (Web)-Dokumenten, weil durch die explizite Semantik Mehrdeutigkeiten von nat{\"u}rlicher Sprache vermieden und semantische Beziehungen in das Suchergebnis einbezogen werden k{\"o}nnen. Eine semantische, Entit{\"a}ten-basierte Suche geht von einer Anfrage mit festgelegter Bedeutung aus und liefert nur Dokumente, die mit dieser Entit{\"a}t annotiert sind als Suchergebnis. Die wichtigste Voraussetzung f{\"u}r eine Entit{\"a}ten-zentrierte Suche stellt die Annotation der Dokumente im Archiv mit Entit{\"a}ten und Kategorien dar. Textuelle Informationen werden analysiert und mit den entsprechenden Entit{\"a}ten und Kategorien versehen, um den Inhalt semantisch erschließen zu k{\"o}nnen. Eine manuelle Annotation erfordert Dom{\"a}nenwissen und ist sehr zeitaufwendig. Die semantische Annotation von Videodokumenten erfordert besondere Aufmerksamkeit, da inhaltsbasierte Metadaten von Videos aus verschiedenen Quellen stammen, verschiedene Eigenschaften und Zuverl{\"a}ssigkeiten besitzen und daher nicht wie Fließtext behandelt werden k{\"o}nnen. Die vorliegende Arbeit stellt einen semantischen Analyseprozess f{\"u}r Video-Metadaten vor. Die Eigenschaften der verschiedenen Metadatentypen werden analysiert und ein Konfidenzwert ermittelt. Dieser Wert spiegelt die Korrektheit und die wahrscheinliche Mehrdeutigkeit eines Metadatums wieder. Beginnend mit dem Metadatum mit dem h{\"o}chsten Konfidenzwert wird der Analyseprozess innerhalb eines Kontexts in absteigender Reihenfolge des Konfidenzwerts durchgef{\"u}hrt. Die bereits analysierten Metadaten dienen als Referenzpunkt f{\"u}r die weiteren Analysen. So kann eine m{\"o}glichst korrekte Analyse der heterogen strukturierten Daten eines Kontexts sichergestellt werden. Am Ende der Analyse eines Metadatums wird die f{\"u}r den Kontext relevanteste Entit{\"a}t aus einer Liste von Kandidaten identifiziert - das Metadatum wird disambiguiert. Hierf{\"u}r wurden verschiedene Disambiguierungsalgorithmen entwickelt, die Beschreibungstexte und semantische Beziehungen der Entit{\"a}tenkandidaten zum gegebenen Kontext in Betracht ziehen. Der Kontext f{\"u}r die Disambiguierung wird f{\"u}r jedes Metadatum anhand der Eigenschaften und Konfidenzwerte zusammengestellt. Der vorgestellte Analyseprozess ist an zwei Hypothesen angelehnt: Um die Analyseergebnisse verbessern zu k{\"o}nnen, sollten die Metadaten eines Kontexts in absteigender Reihenfolge ihres Konfidenzwertes verarbeitet werden und die Kontextgrenzen von Videometadaten sollten durch Segmentgrenzen definiert werden, um m{\"o}glichst Kontexte mit koh{\"a}rentem Inhalt zu erhalten. Durch ausf{\"u}hrliche Evaluationen konnten die gestellten Hypothesen best{\"a}tigt werden. Der Analyseprozess wurden gegen mehrere State-of-the-Art Methoden verglichen und erzielt verbesserte Ergebnisse in Bezug auf Recall und Precision, besonders f{\"u}r Metadaten, die aus weniger zuverl{\"a}ssigen Quellen stammen. Der Analyseprozess ist Teil eines Videoanalyse-Frameworks und wurde bereits erfolgreich in verschiedenen Projekten eingesetzt.}, language = {en} } @phdthesis{RoggeSolti2014, author = {Rogge-Solti, Andreas}, title = {Probabilistic Estimation of Unobserved Process Events}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-70426}, school = {Universit{\"a}t Potsdam}, year = {2014}, abstract = {Organizations try to gain competitive advantages, and to increase customer satisfaction. To ensure the quality and efficiency of their business processes, they perform business process management. An important part of process management that happens on the daily operational level is process controlling. A prerequisite of controlling is process monitoring, i.e., keeping track of the performed activities in running process instances. Only by process monitoring can business analysts detect delays and react to deviations from the expected or guaranteed performance of a process instance. To enable monitoring, process events need to be collected from the process environment. When a business process is orchestrated by a process execution engine, monitoring is available for all orchestrated process activities. Many business processes, however, do not lend themselves to automatic orchestration, e.g., because of required freedom of action. This situation is often encountered in hospitals, where most business processes are manually enacted. Hence, in practice it is often inefficient or infeasible to document and monitor every process activity. Additionally, manual process execution and documentation is prone to errors, e.g., documentation of activities can be forgotten. Thus, organizations face the challenge of process events that occur, but are not observed by the monitoring environment. These unobserved process events can serve as basis for operational process decisions, even without exact knowledge of when they happened or when they will happen. An exemplary decision is whether to invest more resources to manage timely completion of a case, anticipating that the process end event will occur too late. This thesis offers means to reason about unobserved process events in a probabilistic way. We address decisive questions of process managers (e.g., "when will the case be finished?", or "when did we perform the activity that we forgot to document?") in this thesis. As main contribution, we introduce an advanced probabilistic model to business process management that is based on a stochastic variant of Petri nets. We present a holistic approach to use the model effectively along the business process lifecycle. Therefore, we provide techniques to discover such models from historical observations, to predict the termination time of processes, and to ensure quality by missing data management. We propose mechanisms to optimize configuration for monitoring and prediction, i.e., to offer guidance in selecting important activities to monitor. An implementation is provided as a proof of concept. For evaluation, we compare the accuracy of the approach with that of state-of-the-art approaches using real process data of a hospital. Additionally, we show its more general applicability in other domains by applying the approach on process data from logistics and finance.}, language = {en} } @phdthesis{Perscheid2013, author = {Perscheid, Michael}, title = {Test-driven fault navigation for debugging reproducible failures}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-68155}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {The correction of software failures tends to be very cost-intensive because their debugging is an often time-consuming development activity. During this activity, developers largely attempt to understand what causes failures: Starting with a test case that reproduces the observable failure they have to follow failure causes on the infection chain back to the root cause (defect). This idealized procedure requires deep knowledge of the system and its behavior because failures and defects can be far apart from each other. Unfortunately, common debugging tools are inadequate for systematically investigating such infection chains in detail. Thus, developers have to rely primarily on their intuition and the localization of failure causes is not time-efficient. To prevent debugging by disorganized trial and error, experienced developers apply the scientific method and its systematic hypothesis-testing. However, even when using the scientific method, the search for failure causes can still be a laborious task. First, lacking expertise about the system makes it hard to understand incorrect behavior and to create reasonable hypotheses. Second, contemporary debugging approaches provide no or only partial support for the scientific method. In this dissertation, we present test-driven fault navigation as a debugging guide for localizing reproducible failures with the scientific method. Based on the analysis of passing and failing test cases, we reveal anomalies and integrate them into a breadth-first search that leads developers to defects. This systematic search consists of four specific navigation techniques that together support the creation, evaluation, and refinement of failure cause hypotheses for the scientific method. First, structure navigation localizes suspicious system parts and restricts the initial search space. Second, team navigation recommends experienced developers for helping with failures. Third, behavior navigation allows developers to follow emphasized infection chains back to root causes. Fourth, state navigation identifies corrupted state and reveals parts of the infection chain automatically. We implement test-driven fault navigation in our Path Tools framework for the Squeak/Smalltalk development environment and limit its computation cost with the help of our incremental dynamic analysis. This lightweight dynamic analysis ensures an immediate debugging experience with our tools by splitting the run-time overhead over multiple test runs depending on developers' needs. Hence, our test-driven fault navigation in combination with our incremental dynamic analysis answers important questions in a short time: where to start debugging, who understands failure causes best, what happened before failures, and which state properties are infected.}, language = {en} } @phdthesis{Lorey2014, author = {Lorey, Johannes}, title = {What's in a query : analyzing, predicting, and managing linked data access}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-72312}, school = {Universit{\"a}t Potsdam}, year = {2014}, abstract = {The term Linked Data refers to connected information sources comprising structured data about a wide range of topics and for a multitude of applications. In recent years, the conceptional and technical foundations of Linked Data have been formalized and refined. To this end, well-known technologies have been established, such as the Resource Description Framework (RDF) as a Linked Data model or the SPARQL Protocol and RDF Query Language (SPARQL) for retrieving this information. Whereas most research has been conducted in the area of generating and publishing Linked Data, this thesis presents novel approaches for improved management. In particular, we illustrate new methods for analyzing and processing SPARQL queries. Here, we present two algorithms suitable for identifying structural relationships between these queries. Both algorithms are applied to a large number of real-world requests to evaluate the performance of the approaches and the quality of their results. Based on this, we introduce different strategies enabling optimized access of Linked Data sources. We demonstrate how the presented approach facilitates effective utilization of SPARQL endpoints by prefetching results relevant for multiple subsequent requests. Furthermore, we contribute a set of metrics for determining technical characteristics of such knowledge bases. To this end, we devise practical heuristics and validate them through thorough analysis of real-world data sources. We discuss the findings and evaluate their impact on utilizing the endpoints. Moreover, we detail the adoption of a scalable infrastructure for improving Linked Data discovery and consumption. As we outline in an exemplary use case, this platform is eligible both for processing and provisioning the corresponding information.}, language = {en} } @phdthesis{Hebig2014, author = {Hebig, Regina}, title = {Evolution of model-driven engineering settings in practice}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-70761}, school = {Universit{\"a}t Potsdam}, year = {2014}, abstract = {Nowadays, software systems are getting more and more complex. To tackle this challenge most diverse techniques, such as design patterns, service oriented architectures (SOA), software development processes, and model-driven engineering (MDE), are used to improve productivity, while time to market and quality of the products stay stable. Multiple of these techniques are used in parallel to profit from their benefits. While the use of sophisticated software development processes is standard, today, MDE is just adopted in practice. However, research has shown that the application of MDE is not always successful. It is not fully understood when advantages of MDE can be used and to what degree MDE can also be disadvantageous for productivity. Further, when combining different techniques that aim to affect the same factor (e.g. productivity) the question arises whether these techniques really complement each other or, in contrast, compensate their effects. Due to that, there is the concrete question how MDE and other techniques, such as software development process, are interrelated. Both aspects (advantages and disadvantages for productivity as well as the interrelation to other techniques) need to be understood to identify risks relating to the productivity impact of MDE. Before studying MDE's impact on productivity, it is necessary to investigate the range of validity that can be reached for the results. This includes two questions. First, there is the question whether MDE's impact on productivity is similar for all approaches of adopting MDE in practice. Second, there is the question whether MDE's impact on productivity for an approach of using MDE in practice remains stable over time. The answers for both questions are crucial for handling risks of MDE, but also for the design of future studies on MDE success. This thesis addresses these questions with the goal to support adoption of MDE in future. To enable a differentiated discussion about MDE, the term MDE setting'' is introduced. MDE setting refers to the applied technical setting, i.e. the employed manual and automated activities, artifacts, languages, and tools. An MDE setting's possible impact on productivity is studied with a focus on changeability and the interrelation to software development processes. This is done by introducing a taxonomy of changeability concerns that might be affected by an MDE setting. Further, three MDE traits are identified and it is studied for which manifestations of these MDE traits software development processes are impacted. To enable the assessment and evaluation of an MDE setting's impacts, the Software Manufacture Model language is introduced. This is a process modeling language that allows to reason about how relations between (modeling) artifacts (e.g. models or code files) change during application of manual or automated development activities. On that basis, risk analysis techniques are provided. These techniques allow identifying changeability risks and assessing the manifestations of the MDE traits (and with it an MDE setting's impact on software development processes). To address the range of validity, MDE settings from practice and their evolution histories were capture in context of this thesis. First, this data is used to show that MDE settings cover the whole spectrum concerning their impact on changeability or interrelation to software development processes. Neither it is seldom that MDE settings are neutral for processes nor is it seldom that MDE settings have impact on processes. Similarly, the impact on changeability differs relevantly. Second, a taxonomy of evolution of MDE settings is introduced. In that context it is discussed to what extent different types of changes on an MDE setting can influence this MDE setting's impact on changeability and the interrelation to processes. The category of structural evolution, which can change these characteristics of an MDE setting, is identified. The captured MDE settings from practice are used to show that structural evolution exists and is common. In addition, some examples of structural evolution steps are collected that actually led to a change in the characteristics of the respective MDE settings. Two implications are: First, the assessed diversity of MDE settings evaluates the need for the analysis techniques that shall be presented in this thesis. Second, evolution is one explanation for the diversity of MDE settings in practice. To summarize, this thesis studies the nature and evolution of MDE settings in practice. As a result support for the adoption of MDE settings is provided in form of techniques for the identification of risks relating to productivity impacts.}, language = {en} } @phdthesis{Gumienny2013, author = {Gumienny, Raja Carola}, title = {Understanding the adoption of digital whiteboard systems for collaborative design work}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-72417}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {User-centered design processes are the first choice when new interactive systems or services are developed to address real customer needs and provide a good user experience. Common tools for collecting user research data, conducting brainstormings, or sketching ideas are whiteboards and sticky notes. They are ubiquitously available, and no technical or domain knowledge is necessary to use them. However, traditional pen and paper tools fall short when saving the content and sharing it with others unable to be in the same location. They are also missing further digital advantages such as searching or sorting content. Although research on digital whiteboard and sticky note applications has been conducted for over 20 years, these tools are not widely adopted in company contexts. While many research prototypes exist, they have not been used for an extended period of time in a real-world context. The goal of this thesis is to investigate what the enablers and obstacles for the adoption of digital whiteboard systems are. As an instrument for different studies, we developed the Tele-Board software system for collaborative creative work. Based on interviews, observations, and findings from former research, we tried to transfer the analog way of working to the digital world. Being a software system, Tele-Board can be used with a variety of hardware and does not depend on special devices. This feature became one of the main factors for adoption on a larger scale. In this thesis, I will present three studies on the use of Tele-Board with different user groups and foci. I will use a combination of research methods (laboratory case studies and data from field research) with the overall goal of finding out when a digital whiteboard system is used and in which cases not. Not surprisingly, the system is used and accepted if a user sees a main benefit that neither analog tools nor other applications can offer. However, I found that these perceived benefits are very different for each user and usage context. If a tool provides possibilities to use in different ways and with different equipment, the chances of its adoption by a larger group increase. Tele-Board has now been in use for over 1.5 years in a global IT company in at least five countries with a constantly growing user base. Its use, advantages, and disadvantages will be described based on 42 interviews and usage statistics from server logs. Through these insights and findings from laboratory case studies, I will present a detailed analysis of digital whiteboard use in different contexts with design implications for future systems.}, language = {en} } @phdthesis{Dawoud2013, author = {Dawoud, Wesam}, title = {Scalability and performance management of internet applications in the cloud}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-68187}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {Cloud computing is a model for enabling on-demand access to a shared pool of computing resources. With virtually limitless on-demand resources, a cloud environment enables the hosted Internet application to quickly cope when there is an increase in the workload. However, the overhead of provisioning resources exposes the Internet application to periods of under-provisioning and performance degradation. Moreover, the performance interference, due to the consolidation in the cloud environment, complicates the performance management of the Internet applications. In this dissertation, we propose two approaches to mitigate the impact of the resources provisioning overhead. The first approach employs control theory to scale resources vertically and cope fast with workload. This approach assumes that the provider has knowledge and control over the platform running in the virtual machines (VMs), which limits it to Platform as a Service (PaaS) and Software as a Service (SaaS) providers. The second approach is a customer-side one that deals with the horizontal scalability in an Infrastructure as a Service (IaaS) model. It addresses the trade-off problem between cost and performance with a multi-goal optimization solution. This approach finds the scale thresholds that achieve the highest performance with the lowest increase in the cost. Moreover, the second approach employs a proposed time series forecasting algorithm to scale the application proactively and avoid under-utilization periods. Furthermore, to mitigate the interference impact on the Internet application performance, we developed a system which finds and eliminates the VMs suffering from performance interference. The developed system is a light-weight solution which does not imply provider involvement. To evaluate our approaches and the designed algorithms at large-scale level, we developed a simulator called (ScaleSim). In the simulator, we implemented scalability components acting as the scalability components of Amazon EC2. The current scalability implementation in Amazon EC2 is used as a reference point for evaluating the improvement in the scalable application performance. ScaleSim is fed with realistic models of the RUBiS benchmark extracted from the real environment. The workload is generated from the access logs of the 1998 world cup website. The results show that optimizing the scalability thresholds and adopting proactive scalability can mitigate 88\% of the resources provisioning overhead impact with only a 9\% increase in the cost.}, language = {en} }