@phdthesis{AlSaffar2016, author = {Al-Saffar, Loay Talib Ahmed}, title = {Analysing prerequisites, expectations, apprehensions, and attitudes of university students studying Computer science}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-98437}, school = {Universit{\"a}t Potsdam}, pages = {xii, 131}, year = {2016}, abstract = {The main objective of this dissertation is to analyse prerequisites, expectations, apprehensions, and attitudes of students studying computer science, who are willing to gain a bachelor degree. The research will also investigate in the students' learning style according to the Felder-Silverman model. These investigations fall in the attempt to make an impact on reducing the "dropout"/shrinkage rate among students, and to suggest a better learning environment. The first investigation starts with a survey that has been made at the computer science department at the University of Baghdad to investigate the attitudes of computer science students in an environment dominated by women, showing the differences in attitudes between male and female students in different study years. Students are accepted to university studies via a centrally controlled admission procedure depending mainly on their final score at school. This leads to a high percentage of students studying subjects they do not want. Our analysis shows that 75\% of the female students do not regret studying computer science although it was not their first choice. And according to statistics over previous years, women manage to succeed in their study and often graduate on top of their class. We finish with a comparison of attitudes between the freshman students of two different cultures and two different university enrolment procedures (University of Baghdad, in Iraq, and the University of Potsdam, in Germany) both with opposite gender majority. The second step of investigation took place at the department of computer science at the University of Potsdam in Germany and analyzes the learning styles of students studying the three major fields of study offered by the department (computer science, business informatics, and computer science teaching). Investigating the differences in learning styles between the students of those study fields who usually take some joint courses is important to be aware of which changes are necessary to be adopted in the teaching methods to address those different students. It was a two stage study using two questionnaires; the main one is based on the Index of Learning Styles Questionnaire of B. A. Solomon and R. M. Felder, and the second questionnaire was an investigation on the students' attitudes towards the findings of their personal first questionnaire. Our analysis shows differences in the preferences of learning style between male and female students of the different study fields, as well as differences between students with the different specialties (computer science, business informatics, and computer science teaching). The third investigation looks closely into the difficulties, issues, apprehensions and expectations of freshman students studying computer science. The study took place at the computer science department at the University of Potsdam with a volunteer sample of students. The goal is to determine and discuss the difficulties and issues that they are facing in their study that may lead them to think in dropping-out, changing the study field, or changing the university. The research continued with the same sample of students (with business informatics students being the majority) through more than three semesters. Difficulties and issues during the study were documented, as well as students' attitudes, apprehensions, and expectations. Some of the professors and lecturers opinions and solutions to some students' problems were also documented. Many participants had apprehensions and difficulties, especially towards informatics subjects. Some business informatics participants began to think of changing the university, in particular when they reached their third semester, others thought about changing their field of study. Till the end of this research, most of the participants continued in their studies (the study they have started with or the new study they have changed to) without leaving the higher education system.}, language = {en} } @phdthesis{Wolf2021, author = {Wolf, Johannes}, title = {Analysis and visualization of transport infrastructure based on large-scale geospatial mobile mapping data}, doi = {10.25932/publishup-53612}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-536129}, school = {Universit{\"a}t Potsdam}, pages = {vi, 121}, year = {2021}, abstract = {3D point clouds are a universal and discrete digital representation of three-dimensional objects and environments. For geospatial applications, 3D point clouds have become a fundamental type of raw data acquired and generated using various methods and techniques. In particular, 3D point clouds serve as raw data for creating digital twins of the built environment. This thesis concentrates on the research and development of concepts, methods, and techniques for preprocessing, semantically enriching, analyzing, and visualizing 3D point clouds for applications around transport infrastructure. It introduces a collection of preprocessing techniques that aim to harmonize raw 3D point cloud data, such as point density reduction and scan profile detection. Metrics such as, e.g., local density, verticality, and planarity are calculated for later use. One of the key contributions tackles the problem of analyzing and deriving semantic information in 3D point clouds. Three different approaches are investigated: a geometric analysis, a machine learning approach operating on synthetically generated 2D images, and a machine learning approach operating on 3D point clouds without intermediate representation. In the first application case, 2D image classification is applied and evaluated for mobile mapping data focusing on road networks to derive road marking vector data. The second application case investigates how 3D point clouds can be merged with ground-penetrating radar data for a combined visualization and to automatically identify atypical areas in the data. For example, the approach detects pavement regions with developing potholes. The third application case explores the combination of a 3D environment based on 3D point clouds with panoramic imagery to improve visual representation and the detection of 3D objects such as traffic signs. The presented methods were implemented and tested based on software frameworks for 3D point clouds and 3D visualization. In particular, modules for metric computation, classification procedures, and visualization techniques were integrated into a modular pipeline-based C++ research framework for geospatial data processing, extended by Python machine learning scripts. All visualization and analysis techniques scale to large real-world datasets such as road networks of entire cities or railroad networks. The thesis shows that some use cases allow taking advantage of established image vision methods to analyze images rendered from mobile mapping data efficiently. The two presented semantic classification methods working directly on 3D point clouds are use case independent and show similar overall accuracy when compared to each other. While the geometry-based method requires less computation time, the machine learning-based method supports arbitrary semantic classes but requires training the network with ground truth data. Both methods can be used in combination to gradually build this ground truth with manual corrections via a respective annotation tool. This thesis contributes results for IT system engineering of applications, systems, and services that require spatial digital twins of transport infrastructure such as road networks and railroad networks based on 3D point clouds as raw data. It demonstrates the feasibility of fully automated data flows that map captured 3D point clouds to semantically classified models. This provides a key component for seamlessly integrated spatial digital twins in IT solutions that require up-to-date, object-based, and semantically enriched information about the built environment.}, language = {en} } @phdthesis{Floeter2005, author = {Fl{\"o}ter, Andr{\´e}}, title = {Analyzing biological expression data based on decision tree induction}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-6416}, school = {Universit{\"a}t Potsdam}, year = {2005}, abstract = {Modern biological analysis techniques supply scientists with various forms of data. One category of such data are the so called "expression data". These data indicate the quantities of biochemical compounds present in tissue samples. Recently, expression data can be generated at a high speed. This leads in turn to amounts of data no longer analysable by classical statistical techniques. Systems biology is the new field that focuses on the modelling of this information. At present, various methods are used for this purpose. One superordinate class of these meth­ods is machine learning. Methods of this kind had, until recently, predominantly been used for classification and prediction tasks. This neglected a powerful secondary benefit: the ability to induce interpretable models. Obtaining such models from data has become a key issue within Systems biology. Numerous approaches have been proposed and intensively discussed. This thesis focuses on the examination and exploitation of one basic technique: decision trees. The concept of comparing sets of decision trees is developed. This method offers the pos­sibility of identifying significant thresholds in continuous or discrete valued attributes through their corresponding set of decision trees. Finding significant thresholds in attributes is a means of identifying states in living organisms. Knowing about states is an invaluable clue to the un­derstanding of dynamic processes in organisms. Applied to metabolite concentration data, the proposed method was able to identify states which were not found with conventional techniques for threshold extraction. A second approach exploits the structure of sets of decision trees for the discovery of com­binatorial dependencies between attributes. Previous work on this issue has focused either on expensive computational methods or the interpretation of single decision trees ­ a very limited exploitation of the data. This has led to incomplete or unstable results. That is why a new method is developed that uses sets of decision trees to overcome these limitations. Both the introduced methods are available as software tools. They can be applied consecu­tively or separately. That way they make up a package of analytical tools that usefully supplement existing methods. By means of these tools, the newly introduced methods were able to confirm existing knowl­edge and to suggest interesting and new relationships between metabolites.}, subject = {Molekulare Bioinformatik}, language = {en} } @phdthesis{Scholz2006, author = {Scholz, Matthias}, title = {Approaches to analyse and interpret biological profile data}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-7839}, school = {Universit{\"a}t Potsdam}, year = {2006}, abstract = {Advances in biotechnologies rapidly increase the number of molecules of a cell which can be observed simultaneously. This includes expression levels of thousands or ten-thousands of genes as well as concentration levels of metabolites or proteins. Such Profile data, observed at different times or at different experimental conditions (e.g., heat or dry stress), show how the biological experiment is reflected on the molecular level. This information is helpful to understand the molecular behaviour and to identify molecules or combination of molecules that characterise specific biological condition (e.g., disease). This work shows the potentials of component extraction algorithms to identify the major factors which influenced the observed data. This can be the expected experimental factors such as the time or temperature as well as unexpected factors such as technical artefacts or even unknown biological behaviour. Extracting components means to reduce the very high-dimensional data to a small set of new variables termed components. Each component is a combination of all original variables. The classical approach for that purpose is the principal component analysis (PCA). It is shown that, in contrast to PCA which maximises the variance only, modern approaches such as independent component analysis (ICA) are more suitable for analysing molecular data. The condition of independence between components of ICA fits more naturally our assumption of individual (independent) factors which influence the data. This higher potential of ICA is demonstrated by a crossing experiment of the model plant Arabidopsis thaliana (Thale Cress). The experimental factors could be well identified and, in addition, ICA could even detect a technical artefact. However, in continuously observations such as in time experiments, the data show, in general, a nonlinear distribution. To analyse such nonlinear data, a nonlinear extension of PCA is used. This nonlinear PCA (NLPCA) is based on a neural network algorithm. The algorithm is adapted to be applicable to incomplete molecular data sets. Thus, it provides also the ability to estimate the missing data. The potential of nonlinear PCA to identify nonlinear factors is demonstrated by a cold stress experiment of Arabidopsis thaliana. The results of component analysis can be used to build a molecular network model. Since it includes functional dependencies it is termed functional network. Applied to the cold stress data, it is shown that functional networks are appropriate to visualise biological processes and thereby reveals molecular dynamics.}, subject = {Bioinformatik}, language = {en} } @phdthesis{Becker2013, author = {Becker, Basil}, title = {Architectural modelling and verification of open service-oriented systems of systems}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-70158}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {Systems of Systems (SoS) have received a lot of attention recently. In this thesis we will focus on SoS that are built atop the techniques of Service-Oriented Architectures and thus combine the benefits and challenges of both paradigms. For this thesis we will understand SoS as ensembles of single autonomous systems that are integrated to a larger system, the SoS. The interesting fact about these systems is that the previously isolated systems are still maintained, improved and developed on their own. Structural dynamics is an issue in SoS, as at every point in time systems can join and leave the ensemble. This and the fact that the cooperation among the constituent systems is not necessarily observable means that we will consider these systems as open systems. Of course, the system has a clear boundary at each point in time, but this can only be identified by halting the complete SoS. However, halting a system of that size is practically impossible. Often SoS are combinations of software systems and physical systems. Hence a failure in the software system can have a serious physical impact what makes an SoS of this kind easily a safety-critical system. The contribution of this thesis is a modelling approach that extends OMG's SoaML and basically relies on collaborations and roles as an abstraction layer above the components. This will allow us to describe SoS at an architectural level. We will also give a formal semantics for our modelling approach which employs hybrid graph-transformation systems. The modelling approach is accompanied by a modular verification scheme that will be able to cope with the complexity constraints implied by the SoS' structural dynamics and size. Building such autonomous systems as SoS without evolution at the architectural level --- i. e. adding and removing of components and services --- is inadequate. Therefore our approach directly supports the modelling and verification of evolution.}, language = {en} } @phdthesis{Ishebabi2010, author = {Ishebabi, Harold}, title = {Architecture synthesis for adaptive multiprocessor systems on chip}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-41316}, school = {Universit{\"a}t Potsdam}, year = {2010}, abstract = {This thesis presents methods for automated synthesis of flexible chip multiprocessor systems from parallel programs targeted at FPGAs to exploit both task-level parallelism and architecture customization. Automated synthesis is necessitated by the complexity of the design space. A detailed description of the design space is provided in order to determine which parameters should be modeled to facilitate automated synthesis by optimizing a cost function, the emphasis being placed on inclusive modeling of parameters from application, architectural and physical subspaces, as well as their joint coverage in order to avoid pre-constraining the design space. Given a parallel program and a set of an IP library, the automated synthesis problem is to simultaneously (i) select processors (ii) map and schedule tasks to them, and (iii) select one or several networks for inter-task communications such that design constraints and optimization objectives are met. The research objective in this thesis is to find a suitable model for automated synthesis, and to evaluate methods of using the model for architectural optimizations. Our contributions are a holistic approach for the design of such systems, corresponding models to facilitate automated synthesis, evaluation of optimization methods using state of the art integer linear and answer set programming, as well as the development of synthesis heuristics to solve runtime challenges.}, language = {en} } @phdthesis{Wist2011, author = {Wist, Dominic}, title = {Attacking complexity in logic synthesis of asynchronous circuits}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-59706}, school = {Universit{\"a}t Potsdam}, year = {2011}, abstract = {Most of the microelectronic circuits fabricated today are synchronous, i.e. they are driven by one or several clock signals. Synchronous circuit design faces several fundamental challenges such as high-speed clock distribution, integration of multiple cores operating at different clock rates, reduction of power consumption and dealing with voltage, temperature, manufacturing and runtime variations. Asynchronous or clockless design plays a key role in alleviating these challenges, however the design and test of asynchronous circuits is much more difficult in comparison to their synchronous counterparts. A driving force for a widespread use of asynchronous technology is the availability of mature EDA (Electronic Design Automation) tools which provide an entire automated design flow starting from an HDL (Hardware Description Language) specification yielding the final circuit layout. Even though there was much progress in developing such EDA tools for asynchronous circuit design during the last two decades, the maturity level as well as the acceptance of them is still not comparable with tools for synchronous circuit design. In particular, logic synthesis (which implies the application of Boolean minimisation techniques) for the entire system's control path can significantly improve the efficiency of the resulting asynchronous implementation, e.g. in terms of chip area and performance. However, logic synthesis, in particular for asynchronous circuits, suffers from complexity problems. Signal Transitions Graphs (STGs) are labelled Petri nets which are a widely used to specify the interface behaviour of speed independent (SI) circuits - a robust subclass of asynchronous circuits. STG decomposition is a promising approach to tackle complexity problems like state space explosion in logic synthesis of SI circuits. The (structural) decomposition of STGs is guided by a partition of the output signals and generates a usually much smaller component STG for each partition member, i.e. a component STG with a much smaller state space than the initial specification. However, decomposition can result in component STGs that in isolation have so-called irreducible CSC conflicts (i.e. these components are not SI synthesisable anymore) even if the specification has none of them. A new approach is presented to avoid such conflicts by introducing internal communication between the components. So far, STG decompositions are guided by the finest output partitions, i.e. one output per component. However, this might not yield optimal circuit implementations. Efficient heuristics are presented to determine coarser partitions leading to improved circuits in terms of chip area. For the new algorithms correctness proofs are given and their implementations are incorporated into the decomposition tool DESIJ. The presented techniques are successfully applied to some benchmarks - including 'real-life' specifications arising in the context of control resynthesis - which delivered promising results.}, language = {en} } @phdthesis{Weise2021, author = {Weise, Matthias}, title = {Auswahl von Selektions- und Manipulationstechniken f{\"u}r Virtual Reality-Anwendungen}, doi = {10.25932/publishup-53458}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-534586}, school = {Universit{\"a}t Potsdam}, pages = {iii, 218}, year = {2021}, abstract = {Die stetige Weiterentwicklung von VR-Systemen bietet neue M{\"o}glichkeiten der Interaktion mit virtuellen Objekten im dreidimensionalen Raum, stellt Entwickelnde von VRAnwendungen aber auch vor neue Herausforderungen. Selektions- und Manipulationstechniken m{\"u}ssen unter Ber{\"u}cksichtigung des Anwendungsszenarios, der Zielgruppe und der zur Verf{\"u}gung stehenden Ein- und Ausgabeger{\"a}te ausgew{\"a}hlt werden. Diese Arbeit leistet einen Beitrag dazu, die Auswahl von passenden Interaktionstechniken zu unterst{\"u}tzen. Hierf{\"u}r wurde eine repr{\"a}sentative Menge von Selektions- und Manipulationstechniken untersucht und, unter Ber{\"u}cksichtigung existierender Klassifikationssysteme, eine Taxonomie entwickelt, die die Analyse der Techniken hinsichtlich interaktionsrelevanter Eigenschaften erm{\"o}glicht. Auf Basis dieser Taxonomie wurden Techniken ausgew{\"a}hlt, die in einer explorativen Studie verglichen wurden, um R{\"u}ckschl{\"u}sse auf die Dimensionen der Taxonomie zu ziehen und neue Indizien f{\"u}r Vor- und Nachteile der Techniken in spezifischen Anwendungsszenarien zu generieren. Die Ergebnisse der Arbeit m{\"u}nden in eine Webanwendung, die Entwickelnde von VR-Anwendungen gezielt dabei unterst{\"u}tzt, passende Selektions- und Manipulationstechniken f{\"u}r ein Anwendungsszenario auszuw{\"a}hlen, indem Techniken auf Basis der Taxonomie gefiltert und unter Verwendung der Resultate aus der Studie sortiert werden k{\"o}nnen.}, language = {de} } @phdthesis{Dreseler2022, author = {Dreseler, Markus}, title = {Automatic tiering for in-memory database systems}, doi = {10.25932/publishup-55825}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-558253}, school = {Universit{\"a}t Potsdam}, pages = {vii, 143}, year = {2022}, abstract = {A decade ago, it became feasible to store multi-terabyte databases in main memory. These in-memory databases (IMDBs) profit from DRAM's low latency and high throughput as well as from the removal of costly abstractions used in disk-based systems, such as the buffer cache. However, as the DRAM technology approaches physical limits, scaling these databases becomes difficult. Non-volatile memory (NVM) addresses this challenge. This new type of memory is persistent, has more capacity than DRAM (4x), and does not suffer from its density-inhibiting limitations. Yet, as NVM has a higher latency (5-15x) and a lower throughput (0.35x), it cannot fully replace DRAM. IMDBs thus need to navigate the trade-off between the two memory tiers. We present a solution to this optimization problem. Leveraging information about access frequencies and patterns, our solution utilizes NVM's additional capacity while minimizing the associated access costs. Unlike buffer cache-based implementations, our tiering abstraction does not add any costs when reading data from DRAM. As such, it can act as a drop-in replacement for existing IMDBs. Our contributions are as follows: (1) As the foundation for our research, we present Hyrise, an open-source, columnar IMDB that we re-engineered and re-wrote from scratch. Hyrise enables realistic end-to-end benchmarks of SQL workloads and offers query performance which is competitive with other research and commercial systems. At the same time, Hyrise is easy to understand and modify as repeatedly demonstrated by its uses in research and teaching. (2) We present a novel memory management framework for different memory and storage tiers. By encapsulating the allocation and access methods of these tiers, we enable existing data structures to be stored on different tiers with no modifications to their implementation. Besides DRAM and NVM, we also support and evaluate SSDs and have made provisions for upcoming technologies such as disaggregated memory. (3) To identify the parts of the data that can be moved to (s)lower tiers with little performance impact, we present a tracking method that identifies access skew both in the row and column dimensions and that detects patterns within consecutive accesses. Unlike existing methods that have substantial associated costs, our access counters exhibit no identifiable overhead in standard benchmarks despite their increased accuracy. (4) Finally, we introduce a tiering algorithm that optimizes the data placement for a given memory budget. In the TPC-H benchmark, this allows us to move 90\% of the data to NVM while the throughput is reduced by only 10.8\% and the query latency is increased by 11.6\%. With this, we outperform approaches that ignore the workload's access skew and access patterns and increase the query latency by 20\% or more. Individually, our contributions provide novel approaches to current challenges in systems engineering and database research. Combining them allows IMDBs to scale past the limits of DRAM while continuing to profit from the benefits of in-memory computing.}, language = {en} } @phdthesis{SchulzHanke2023, author = {Schulz-Hanke, Christian}, title = {BCH Codes mit kombinierter Korrektur und Erkennung}, doi = {10.25932/publishup-61794}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-617943}, school = {Universit{\"a}t Potsdam}, pages = {ii, 191}, year = {2023}, abstract = {BCH Codes mit kombinierter Korrektur und Erkennung In dieser Arbeit wird auf Grundlage des BCH Codes untersucht, wie eine Fehlerkorrektur mit einer Erkennung h{\"o}herer Fehleranzahlen kombiniert werden kann. Mit dem Verfahren der 1-Bit Korrektur mit zus{\"a}tzlicher Erkennung h{\"o}herer Fehler wurde ein Ansatz entwickelt, welcher die Erkennung zus{\"a}tzlicher Fehler durch das parallele L{\"o}sen einfacher Gleichungen der Form s_x = s_1^x durchf{\"u}hrt. Die Anzahl dieser Gleichungen ist linear zu der Anzahl der zu {\"u}berpr{\"u}fenden h{\"o}heren Fehler. In dieser Arbeit wurde zus{\"a}tzlich f{\"u}r bis zu 4-Bit Korrekturen mit zus{\"a}tzlicher Erkennung h{\"o}herer Fehler ein weiterer allgemeiner Ansatz vorgestellt. Dabei werden parallel f{\"u}r alle korrigierbaren Fehleranzahlen spekulative Fehlerkorrekturen durchgef{\"u}hrt. Aus den bestimmten Fehlerstellen werden spekulative Syndromkomponenten erzeugt, durch welche die Fehlerstellen best{\"a}tigt und h{\"o}here erkennbare Fehleranzahlen ausgeschlossen werden k{\"o}nnen. Die vorgestellten Ans{\"a}tze unterscheiden sich von dem in entwickelten Ansatz, bei welchem die Anzahl der Fehlerstellen durch die Berechnung von Determinanten in absteigender Reihenfolge berechnet wird, bis die erste Determinante 0 bildet. Bei dem bekannten Verfahren ist durch die Berechnung der Determinanten eine faktorielle Anzahl an Berechnungen in Relation zu der Anzahl zu {\"u}berpr{\"u}fender Fehler durchzuf{\"u}hren. Im Vergleich zu dem bekannten sequentiellen Verfahrens nach Berlekamp Massey besitzen die Berechnungen im vorgestellten Ansatz simple Gleichungen und k{\"o}nnen parallel durchgef{\"u}hrt werden.Bei dem bekannten Verfahren zur parallelen Korrektur von 4-Bit Fehlern ist eine Gleichung vierten Grades im GF(2^m) zu l{\"o}sen. Dies erfolgt, indem eine Hilfsgleichung dritten Grades und vier Gleichungen zweiten Grades parallel gel{\"o}st werden. In der vorliegenden Arbeit wurde gezeigt, dass sich eine Gleichung zweiten Grades einsparen l{\"a}sst, wodurch sich eine Vereinfachung der Hardware bei einer parallelen Realisierung der 4-Bit Korrektur ergibt. Die erzielten Ergebnisse wurden durch umfangreiche Simulationen in Software und Hardwareimplementierungen {\"u}berpr{\"u}ft.}, language = {de} } @phdthesis{Weidlich2011, author = {Weidlich, Matthias}, title = {Behavioural profiles : a relational approach to behaviour consistency}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-55590}, school = {Universit{\"a}t Potsdam}, year = {2011}, abstract = {Business Process Management (BPM) emerged as a means to control, analyse, and optimise business operations. Conceptual models are of central importance for BPM. Most prominently, process models define the behaviour that is performed to achieve a business value. In essence, a process model is a mapping of properties of the original business process to the model, created for a purpose. Different modelling purposes, therefore, result in different models of a business process. Against this background, the misalignment of process models often observed in the field of BPM is no surprise. Even if the same business scenario is considered, models created for strategic decision making differ in content significantly from models created for process automation. Despite their differences, process models that refer to the same business process should be consistent, i.e., free of contradictions. Apparently, there is a trade-off between strictness of a notion of consistency and appropriateness of process models serving different purposes. Existing work on consistency analysis builds upon behaviour equivalences and hierarchical refinements between process models. Hence, these approaches are computationally hard and do not offer the flexibility to gradually relax consistency requirements towards a certain setting. This thesis presents a framework for the analysis of behaviour consistency that takes a fundamentally different approach. As a first step, an alignment between corresponding elements of related process models is constructed. Then, this thesis conducts behavioural analysis grounded on a relational abstraction of the behaviour of a process model, its behavioural profile. Different variants of these profiles are proposed, along with efficient computation techniques for a broad class of process models. Using behavioural profiles, consistency of an alignment between process models is judged by different notions and measures. The consistency measures are also adjusted to assess conformance of process logs that capture the observed execution of a process. Further, this thesis proposes various complementary techniques to support consistency management. It elaborates on how to implement consistent change propagation between process models, addresses the exploration of behavioural commonalities and differences, and proposes a model synthesis for behavioural profiles.}, language = {en} } @phdthesis{Ziehe2005, author = {Ziehe, Andreas}, title = {Blind source separation based on joint diagonalization of matrices with applications in biomedical signal processing}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-5694}, school = {Universit{\"a}t Potsdam}, year = {2005}, abstract = {This thesis is concerned with the solution of the blind source separation problem (BSS). The BSS problem occurs frequently in various scientific and technical applications. In essence, it consists in separating meaningful underlying components out of a mixture of a multitude of superimposed signals. In the recent research literature there are two related approaches to the BSS problem: The first is known as Independent Component Analysis (ICA), where the goal is to transform the data such that the components become as independent as possible. The second is based on the notion of diagonality of certain characteristic matrices derived from the data. Here the goal is to transform the matrices such that they become as diagonal as possible. In this thesis we study the latter method of approximate joint diagonalization (AJD) to achieve a solution of the BSS problem. After an introduction to the general setting, the thesis provides an overview on particular choices for the set of target matrices that can be used for BSS by joint diagonalization. As the main contribution of the thesis, new algorithms for approximate joint diagonalization of several matrices with non-orthogonal transformations are developed. These newly developed algorithms will be tested on synthetic benchmark datasets and compared to other previous diagonalization algorithms. Applications of the BSS methods to biomedical signal processing are discussed and exemplified with real-life data sets of multi-channel biomagnetic recordings.}, subject = {Signaltrennung}, language = {en} } @phdthesis{Tinnefeld2014, author = {Tinnefeld, Christian}, title = {Building a columnar database on shared main memory-based storage}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-72063}, school = {Universit{\"a}t Potsdam}, pages = {175}, year = {2014}, abstract = {In the field of disk-based parallel database management systems exists a great variety of solutions based on a shared-storage or a shared-nothing architecture. In contrast, main memory-based parallel database management systems are dominated solely by the shared-nothing approach as it preserves the in-memory performance advantage by processing data locally on each server. We argue that this unilateral development is going to cease due to the combination of the following three trends: a) Nowadays network technology features remote direct memory access (RDMA) and narrows the performance gap between accessing main memory inside a server and of a remote server to and even below a single order of magnitude. b) Modern storage systems scale gracefully, are elastic, and provide high-availability. c) A modern storage system such as Stanford's RAMCloud even keeps all data resident in main memory. Exploiting these characteristics in the context of a main-memory parallel database management system is desirable. The advent of RDMA-enabled network technology makes the creation of a parallel main memory DBMS based on a shared-storage approach feasible. This thesis describes building a columnar database on shared main memory-based storage. The thesis discusses the resulting architecture (Part I), the implications on query processing (Part II), and presents an evaluation of the resulting solution in terms of performance, high-availability, and elasticity (Part III). In our architecture, we use Stanford's RAMCloud as shared-storage, and the self-designed and developed in-memory AnalyticsDB as relational query processor on top. AnalyticsDB encapsulates data access and operator execution via an interface which allows seamless switching between local and remote main memory, while RAMCloud provides not only storage capacity, but also processing power. Combining both aspects allows pushing-down the execution of database operators into the storage system. We describe how the columnar data processed by AnalyticsDB is mapped to RAMCloud's key-value data model and how the performance advantages of columnar data storage can be preserved. The combination of fast network technology and the possibility to execute database operators in the storage system opens the discussion for site selection. We construct a system model that allows the estimation of operator execution costs in terms of network transfer, data processed in memory, and wall time. This can be used for database operators that work on one relation at a time - such as a scan or materialize operation - to discuss the site selection problem (data pull vs. operator push). Since a database query translates to the execution of several database operators, it is possible that the optimal site selection varies per operator. For the execution of a database operator that works on two (or more) relations at a time, such as a join, the system model is enriched by additional factors such as the chosen algorithm (e.g. Grace- vs. Distributed Block Nested Loop Join vs. Cyclo-Join), the data partitioning of the respective relations, and their overlapping as well as the allowed resource allocation. We present an evaluation on a cluster with 60 nodes where all nodes are connected via RDMA-enabled network equipment. We show that query processing performance is about 2.4x slower if everything is done via the data pull operator execution strategy (i.e. RAMCloud is being used only for data access) and about 27\% slower if operator execution is also supported inside RAMCloud (in comparison to operating only on main memory inside a server without any network communication at all). The fast-crash recovery feature of RAMCloud can be leveraged to provide high-availability, e.g. a server crash during query execution only delays the query response for about one second. Our solution is elastic in a way that it can adapt to changing workloads a) within seconds, b) without interruption of the ongoing query processing, and c) without manual intervention.}, language = {en} } @phdthesis{Steinert2014, author = {Steinert, Bastian}, title = {Built-in recovery support for explorative programming}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-71305}, school = {Universit{\"a}t Potsdam}, year = {2014}, abstract = {This work introduces concepts and corresponding tool support to enable a complementary approach in dealing with recovery. Programmers need to recover a development state, or a part thereof, when previously made changes reveal undesired implications. However, when the need arises suddenly and unexpectedly, recovery often involves expensive and tedious work. To avoid tedious work, literature recommends keeping away from unexpected recovery demands by following a structured and disciplined approach, which consists of the application of various best practices including working only on one thing at a time, performing small steps, as well as making proper use of versioning and testing tools. However, the attempt to avoid unexpected recovery is both time-consuming and error-prone. On the one hand, it requires disproportionate effort to minimize the risk of unexpected situations. On the other hand, applying recommended practices selectively, which saves time, can hardly avoid recovery. In addition, the constant need for foresight and self-control has unfavorable implications. It is exhaustive and impedes creative problem solving. This work proposes to make recovery fast and easy and introduces corresponding support called CoExist. Such dedicated support turns situations of unanticipated recovery from tedious experiences into pleasant ones. It makes recovery fast and easy to accomplish, even if explicit commits are unavailable or tests have been ignored for some time. When mistakes and unexpected insights are no longer associated with tedious corrective actions, programmers are encouraged to change source code as a means to reason about it, as opposed to making changes only after structuring and evaluating them mentally. This work further reports on an implementation of the proposed tool support in the Squeak/Smalltalk development environment. The development of the tools has been accompanied by regular performance and usability tests. In addition, this work investigates whether the proposed tools affect programmers' performance. In a controlled lab study, 22 participants improved the design of two different applications. Using a repeated measurement setup, the study examined the effect of providing CoExist on programming performance. The result of analyzing 88 hours of programming suggests that built-in recovery support as provided with CoExist positively has a positive effect on programming performance in explorative programming tasks.}, language = {en} } @phdthesis{EidSabbagh2015, author = {Eid-Sabbagh, Rami-Habib}, title = {Business process architectures}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-79719}, school = {Universit{\"a}t Potsdam}, pages = {xvii, 256}, year = {2015}, abstract = {Business Process Management has become an integral part of modern organizations in the private and public sector for improving their operations. In the course of Business Process Management efforts, companies and organizations assemble large process model repositories with many hundreds and thousands of business process models bearing a large amount of information. With the advent of large business process model collections, new challenges arise as structuring and managing a large amount of process models, their maintenance, and their quality assurance. This is covered by business process architectures that have been introduced for organizing and structuring business process model collections. A variety of business process architecture approaches have been proposed that align business processes along aspects of interest, e. g., goals, functions, or objects. They provide a high level categorization of single processes ignoring their interdependencies, thus hiding valuable information. The production of goods or the delivery of services are often realized by a complex system of interdependent business processes. Hence, taking a holistic view at business processes interdependencies becomes a major necessity to organize, analyze, and assess the impact of their re-/design. Visualizing business processes interdependencies reveals hidden and implicit information from a process model collection. In this thesis, we present a novel Business Process Architecture approach for representing and analyzing business process interdependencies on an abstract level. We propose a formal definition of our Business Process Architecture approach, design correctness criteria, and develop analysis techniques for assessing their quality. We describe a methodology for applying our Business Process Architecture approach top-down and bottom-up. This includes techniques for Business Process Architecture extraction from, and decomposition to process models while considering consistency issues between business process architecture and process model level. Using our extraction algorithm, we present a novel technique to identify and visualize data interdependencies in Business Process Data Architectures. Our Business Process Architecture approach provides business process experts,managers, and other users of a process model collection with an overview that allows reasoning about a large set of process models, understanding, and analyzing their interdependencies in a facilitated way. In this regard we evaluated our Business Process Architecture approach in an experiment and provide implementations of selected techniques.}, language = {en} } @phdthesis{Smirnov2011, author = {Smirnov, Sergey}, title = {Business process model abstraction}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-60258}, school = {Universit{\"a}t Potsdam}, year = {2011}, abstract = {Business process models are used within a range of organizational initiatives, where every stakeholder has a unique perspective on a process and demands the respective model. As a consequence, multiple process models capturing the very same business process coexist. Keeping such models in sync is a challenge within an ever changing business environment: once a process is changed, all its models have to be updated. Due to a large number of models and their complex relations, model maintenance becomes error-prone and expensive. Against this background, business process model abstraction emerged as an operation reducing the number of stored process models and facilitating model management. Business process model abstraction is an operation preserving essential process properties and leaving out insignificant details in order to retain information relevant for a particular purpose. Process model abstraction has been addressed by several researchers. The focus of their studies has been on particular use cases and model transformations supporting these use cases. This thesis systematically approaches the problem of business process model abstraction shaping the outcome into a framework. We investigate the current industry demand in abstraction summarizing it in a catalog of business process model abstraction use cases. The thesis focuses on one prominent use case where the user demands a model with coarse-grained activities and overall process ordering constraints. We develop model transformations that support this use case starting with the transformations based on process model structure analysis. Further, abstraction methods considering the semantics of process model elements are investigated. First, we suggest how semantically related activities can be discovered in process models-a barely researched challenge. The thesis validates the designed abstraction methods against sets of industrial process models and discusses the method implementation aspects. Second, we develop a novel model transformation, which combined with the related activity discovery allows flexible non-hierarchical abstraction. In this way this thesis advocates novel model transformations that facilitate business process model management and provides the foundations for innovative tool support.}, language = {en} } @phdthesis{Schnjakin2014, author = {Schnjakin, Maxim}, title = {Cloud-RAID}, pages = {137}, year = {2014}, language = {de} } @phdthesis{Schneider2019, author = {Schneider, Jan Niklas}, title = {Computational approaches for emotion research}, doi = {10.25932/publishup-45927}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-459275}, school = {Universit{\"a}t Potsdam}, pages = {xv, 145}, year = {2019}, abstract = {Emotionen sind ein zentrales Element menschlichen Erlebens und spielen eine wichtige Rolle bei der Entscheidungsfindung. Diese Dissertation identifiziert drei methodische Probleme der aktuellen Emotionsforschung und zeigt auf, wie diese mittels computergest{\"u}tzter Methoden gel{\"o}st werden k{\"o}nnen. Dieser Ansatz wird in drei Forschungsprojekten demonstriert, die die Entwicklung solcher Methoden sowie deren Anwendung auf konkrete Forschungsfragen beschreiben. Das erste Projekt beschreibt ein Paradigma welches es erm{\"o}glicht, die subjektive und objektive Schwierigkeit der Emotionswahrnehmung zu messen. Dar{\"u}ber hinaus erm{\"o}glicht es die Verwendung einer beliebigen Anzahl von Emotionskategorien im Vergleich zu den {\"u}blichen sechs Kategorien der Basisemotionen. Die Ergebnisse deuten auf eine Zunahme der Schwierigkeiten bei der Wahrnehmung von Emotionen mit zunehmendem Alter der Darsteller hin und liefern Hinweise darauf, dass junge Erwachsene, {\"a}ltere Menschen und M{\"a}nner ihre Schwierigkeit bei der Wahrnehmung von Emotionen untersch{\"a}tzen. Weitere Analysen zeigten eine geringe Relevanz personenbezogener Variablen und deuteten darauf hin, dass die Schwierigkeit der Emotionswahrnehmung vornehmlich durch die Auspr{\"a}gung der Wertigkeit des Ausdrucks bestimmt wird. Das zweite Projekt zeigt am Beispiel von Arousal, einem etablierten, aber vagen Konstrukt der Emotionsforschung, wie Face-Tracking-Daten dazu genutzt werden k{\"o}nnen solche Konstrukte zu sch{\"a}rfen. Es beschreibt, wie aus Face-Tracking-Daten Maße f{\"u}r die Entfernung, Geschwindigkeit und Beschleunigung von Gesichtsausdr{\"u}cken berechnet werden k{\"o}nnen. Das Projekt untersuchte wie diesen Maße mit der Arousal-Wahrnehmung in Menschen mit und ohne Autismus zusammenh{\"a}ngen. Der Abstand zum Neutralgesicht war pr{\"a}diktiv f{\"u}r die Arousal-Bewertungen in beiden Gruppen. Die Ergebnisse deuten auf eine qualitativ {\"a}hnliche Wahrnehmung von Arousal f{\"u}r Menschen mit und ohne Autismus hin. Im dritten Projekt stellen wir die Partial-Least-Squares-Analyse als allgemeine Methode vor, um eine optimale Repr{\"a}sentation zur Verkn{\"u}pfung zweier hochdimensionale Datens{\"a}tze zu finden. Das Projekt demonstriert die Anwendbarkeit dieser Methode in der Emotionsforschung anhand der Frage nach Unterschieden in der Emotionswahrnehmung zwischen M{\"a}nnern und Frauen. Wir konnten zeigen, dass die emotionale Wahrnehmung von Frauen systematisch mehr Varianz der Gesichtsausdr{\"u}cke erfasst und dass signifikante Unterschiede in der Art und Weise bestehen, wie Frauen und M{\"a}nner einige Gesichtsausdr{\"u}cke wahrnehmen. Diese konnten wir als dynamische Gesichtsausdr{\"u}cke visualisieren. Um die Anwendung der entwickelten Methode f{\"u}r die Forschungsgemeinschaft zu erleichtern, wurde ein Software-Paket f{\"u}r die Statistikumgebung R geschrieben. Zudem wurde eine Website entwickelt (thisemotiondoesnotexist.com), die es Besuchern erlaubt, ein Partial-Least-Squares-Modell von Emotionsbewertungen und Face-Tracking-Daten interaktiv zu erkunden, um die entwickelte Methode zu verbreiten und ihren Nutzen f{\"u}r die Emotionsforschung zu illustrieren.}, language = {en} } @phdthesis{Richter2018, author = {Richter, Rico}, title = {Concepts and techniques for processing and rendering of massive 3D point clouds}, doi = {10.25932/publishup-42330}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-423304}, school = {Universit{\"a}t Potsdam}, pages = {v, 131}, year = {2018}, abstract = {Remote sensing technology, such as airborne, mobile, or terrestrial laser scanning, and photogrammetric techniques, are fundamental approaches for efficient, automatic creation of digital representations of spatial environments. For example, they allow us to generate 3D point clouds of landscapes, cities, infrastructure networks, and sites. As essential and universal category of geodata, 3D point clouds are used and processed by a growing number of applications, services, and systems such as in the domains of urban planning, landscape architecture, environmental monitoring, disaster management, virtual geographic environments as well as for spatial analysis and simulation. While the acquisition processes for 3D point clouds become more and more reliable and widely-used, applications and systems are faced with more and more 3D point cloud data. In addition, 3D point clouds, by their very nature, are raw data, i.e., they do not contain any structural or semantics information. Many processing strategies common to GIS such as deriving polygon-based 3D models generally do not scale for billions of points. GIS typically reduce data density and precision of 3D point clouds to cope with the sheer amount of data, but that results in a significant loss of valuable information at the same time. This thesis proposes concepts and techniques designed to efficiently store and process massive 3D point clouds. To this end, object-class segmentation approaches are presented to attribute semantics to 3D point clouds, used, for example, to identify building, vegetation, and ground structures and, thus, to enable processing, analyzing, and visualizing 3D point clouds in a more effective and efficient way. Similarly, change detection and updating strategies for 3D point clouds are introduced that allow for reducing storage requirements and incrementally updating 3D point cloud databases. In addition, this thesis presents out-of-core, real-time rendering techniques used to interactively explore 3D point clouds and related analysis results. All techniques have been implemented based on specialized spatial data structures, out-of-core algorithms, and GPU-based processing schemas to cope with massive 3D point clouds having billions of points. All proposed techniques have been evaluated and demonstrated their applicability to the field of geospatial applications and systems, in particular for tasks such as classification, processing, and visualization. Case studies for 3D point clouds of entire cities with up to 80 billion points show that the presented approaches open up new ways to manage and apply large-scale, dense, and time-variant 3D point clouds as required by a rapidly growing number of applications and systems.}, language = {en} } @phdthesis{Steinmetz2013, author = {Steinmetz, Nadine}, title = {Context-aware semantic analysis of video metadata}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-70551}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {Im Vergleich zu einer stichwortbasierten Suche erm{\"o}glicht die semantische Suche ein pr{\"a}ziseres und anspruchsvolleres Durchsuchen von (Web)-Dokumenten, weil durch die explizite Semantik Mehrdeutigkeiten von nat{\"u}rlicher Sprache vermieden und semantische Beziehungen in das Suchergebnis einbezogen werden k{\"o}nnen. Eine semantische, Entit{\"a}ten-basierte Suche geht von einer Anfrage mit festgelegter Bedeutung aus und liefert nur Dokumente, die mit dieser Entit{\"a}t annotiert sind als Suchergebnis. Die wichtigste Voraussetzung f{\"u}r eine Entit{\"a}ten-zentrierte Suche stellt die Annotation der Dokumente im Archiv mit Entit{\"a}ten und Kategorien dar. Textuelle Informationen werden analysiert und mit den entsprechenden Entit{\"a}ten und Kategorien versehen, um den Inhalt semantisch erschließen zu k{\"o}nnen. Eine manuelle Annotation erfordert Dom{\"a}nenwissen und ist sehr zeitaufwendig. Die semantische Annotation von Videodokumenten erfordert besondere Aufmerksamkeit, da inhaltsbasierte Metadaten von Videos aus verschiedenen Quellen stammen, verschiedene Eigenschaften und Zuverl{\"a}ssigkeiten besitzen und daher nicht wie Fließtext behandelt werden k{\"o}nnen. Die vorliegende Arbeit stellt einen semantischen Analyseprozess f{\"u}r Video-Metadaten vor. Die Eigenschaften der verschiedenen Metadatentypen werden analysiert und ein Konfidenzwert ermittelt. Dieser Wert spiegelt die Korrektheit und die wahrscheinliche Mehrdeutigkeit eines Metadatums wieder. Beginnend mit dem Metadatum mit dem h{\"o}chsten Konfidenzwert wird der Analyseprozess innerhalb eines Kontexts in absteigender Reihenfolge des Konfidenzwerts durchgef{\"u}hrt. Die bereits analysierten Metadaten dienen als Referenzpunkt f{\"u}r die weiteren Analysen. So kann eine m{\"o}glichst korrekte Analyse der heterogen strukturierten Daten eines Kontexts sichergestellt werden. Am Ende der Analyse eines Metadatums wird die f{\"u}r den Kontext relevanteste Entit{\"a}t aus einer Liste von Kandidaten identifiziert - das Metadatum wird disambiguiert. Hierf{\"u}r wurden verschiedene Disambiguierungsalgorithmen entwickelt, die Beschreibungstexte und semantische Beziehungen der Entit{\"a}tenkandidaten zum gegebenen Kontext in Betracht ziehen. Der Kontext f{\"u}r die Disambiguierung wird f{\"u}r jedes Metadatum anhand der Eigenschaften und Konfidenzwerte zusammengestellt. Der vorgestellte Analyseprozess ist an zwei Hypothesen angelehnt: Um die Analyseergebnisse verbessern zu k{\"o}nnen, sollten die Metadaten eines Kontexts in absteigender Reihenfolge ihres Konfidenzwertes verarbeitet werden und die Kontextgrenzen von Videometadaten sollten durch Segmentgrenzen definiert werden, um m{\"o}glichst Kontexte mit koh{\"a}rentem Inhalt zu erhalten. Durch ausf{\"u}hrliche Evaluationen konnten die gestellten Hypothesen best{\"a}tigt werden. Der Analyseprozess wurden gegen mehrere State-of-the-Art Methoden verglichen und erzielt verbesserte Ergebnisse in Bezug auf Recall und Precision, besonders f{\"u}r Metadaten, die aus weniger zuverl{\"a}ssigen Quellen stammen. Der Analyseprozess ist Teil eines Videoanalyse-Frameworks und wurde bereits erfolgreich in verschiedenen Projekten eingesetzt.}, language = {en} }