@phdthesis{AlSaffar2016, author = {Al-Saffar, Loay Talib Ahmed}, title = {Analysing prerequisites, expectations, apprehensions, and attitudes of university students studying Computer science}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-98437}, school = {Universit{\"a}t Potsdam}, pages = {xii, 131}, year = {2016}, abstract = {The main objective of this dissertation is to analyse prerequisites, expectations, apprehensions, and attitudes of students studying computer science, who are willing to gain a bachelor degree. The research will also investigate in the students' learning style according to the Felder-Silverman model. These investigations fall in the attempt to make an impact on reducing the "dropout"/shrinkage rate among students, and to suggest a better learning environment. The first investigation starts with a survey that has been made at the computer science department at the University of Baghdad to investigate the attitudes of computer science students in an environment dominated by women, showing the differences in attitudes between male and female students in different study years. Students are accepted to university studies via a centrally controlled admission procedure depending mainly on their final score at school. This leads to a high percentage of students studying subjects they do not want. Our analysis shows that 75\% of the female students do not regret studying computer science although it was not their first choice. And according to statistics over previous years, women manage to succeed in their study and often graduate on top of their class. We finish with a comparison of attitudes between the freshman students of two different cultures and two different university enrolment procedures (University of Baghdad, in Iraq, and the University of Potsdam, in Germany) both with opposite gender majority. The second step of investigation took place at the department of computer science at the University of Potsdam in Germany and analyzes the learning styles of students studying the three major fields of study offered by the department (computer science, business informatics, and computer science teaching). Investigating the differences in learning styles between the students of those study fields who usually take some joint courses is important to be aware of which changes are necessary to be adopted in the teaching methods to address those different students. It was a two stage study using two questionnaires; the main one is based on the Index of Learning Styles Questionnaire of B. A. Solomon and R. M. Felder, and the second questionnaire was an investigation on the students' attitudes towards the findings of their personal first questionnaire. Our analysis shows differences in the preferences of learning style between male and female students of the different study fields, as well as differences between students with the different specialties (computer science, business informatics, and computer science teaching). The third investigation looks closely into the difficulties, issues, apprehensions and expectations of freshman students studying computer science. The study took place at the computer science department at the University of Potsdam with a volunteer sample of students. The goal is to determine and discuss the difficulties and issues that they are facing in their study that may lead them to think in dropping-out, changing the study field, or changing the university. The research continued with the same sample of students (with business informatics students being the majority) through more than three semesters. Difficulties and issues during the study were documented, as well as students' attitudes, apprehensions, and expectations. Some of the professors and lecturers opinions and solutions to some students' problems were also documented. Many participants had apprehensions and difficulties, especially towards informatics subjects. Some business informatics participants began to think of changing the university, in particular when they reached their third semester, others thought about changing their field of study. Till the end of this research, most of the participants continued in their studies (the study they have started with or the new study they have changed to) without leaving the higher education system.}, language = {en} } @phdthesis{Wolf2021, author = {Wolf, Johannes}, title = {Analysis and visualization of transport infrastructure based on large-scale geospatial mobile mapping data}, doi = {10.25932/publishup-53612}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-536129}, school = {Universit{\"a}t Potsdam}, pages = {vi, 121}, year = {2021}, abstract = {3D point clouds are a universal and discrete digital representation of three-dimensional objects and environments. For geospatial applications, 3D point clouds have become a fundamental type of raw data acquired and generated using various methods and techniques. In particular, 3D point clouds serve as raw data for creating digital twins of the built environment. This thesis concentrates on the research and development of concepts, methods, and techniques for preprocessing, semantically enriching, analyzing, and visualizing 3D point clouds for applications around transport infrastructure. It introduces a collection of preprocessing techniques that aim to harmonize raw 3D point cloud data, such as point density reduction and scan profile detection. Metrics such as, e.g., local density, verticality, and planarity are calculated for later use. One of the key contributions tackles the problem of analyzing and deriving semantic information in 3D point clouds. Three different approaches are investigated: a geometric analysis, a machine learning approach operating on synthetically generated 2D images, and a machine learning approach operating on 3D point clouds without intermediate representation. In the first application case, 2D image classification is applied and evaluated for mobile mapping data focusing on road networks to derive road marking vector data. The second application case investigates how 3D point clouds can be merged with ground-penetrating radar data for a combined visualization and to automatically identify atypical areas in the data. For example, the approach detects pavement regions with developing potholes. The third application case explores the combination of a 3D environment based on 3D point clouds with panoramic imagery to improve visual representation and the detection of 3D objects such as traffic signs. The presented methods were implemented and tested based on software frameworks for 3D point clouds and 3D visualization. In particular, modules for metric computation, classification procedures, and visualization techniques were integrated into a modular pipeline-based C++ research framework for geospatial data processing, extended by Python machine learning scripts. All visualization and analysis techniques scale to large real-world datasets such as road networks of entire cities or railroad networks. The thesis shows that some use cases allow taking advantage of established image vision methods to analyze images rendered from mobile mapping data efficiently. The two presented semantic classification methods working directly on 3D point clouds are use case independent and show similar overall accuracy when compared to each other. While the geometry-based method requires less computation time, the machine learning-based method supports arbitrary semantic classes but requires training the network with ground truth data. Both methods can be used in combination to gradually build this ground truth with manual corrections via a respective annotation tool. This thesis contributes results for IT system engineering of applications, systems, and services that require spatial digital twins of transport infrastructure such as road networks and railroad networks based on 3D point clouds as raw data. It demonstrates the feasibility of fully automated data flows that map captured 3D point clouds to semantically classified models. This provides a key component for seamlessly integrated spatial digital twins in IT solutions that require up-to-date, object-based, and semantically enriched information about the built environment.}, language = {en} } @phdthesis{Floeter2005, author = {Fl{\"o}ter, Andr{\´e}}, title = {Analyzing biological expression data based on decision tree induction}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-6416}, school = {Universit{\"a}t Potsdam}, year = {2005}, abstract = {Modern biological analysis techniques supply scientists with various forms of data. One category of such data are the so called "expression data". These data indicate the quantities of biochemical compounds present in tissue samples. Recently, expression data can be generated at a high speed. This leads in turn to amounts of data no longer analysable by classical statistical techniques. Systems biology is the new field that focuses on the modelling of this information. At present, various methods are used for this purpose. One superordinate class of these meth­ods is machine learning. Methods of this kind had, until recently, predominantly been used for classification and prediction tasks. This neglected a powerful secondary benefit: the ability to induce interpretable models. Obtaining such models from data has become a key issue within Systems biology. Numerous approaches have been proposed and intensively discussed. This thesis focuses on the examination and exploitation of one basic technique: decision trees. The concept of comparing sets of decision trees is developed. This method offers the pos­sibility of identifying significant thresholds in continuous or discrete valued attributes through their corresponding set of decision trees. Finding significant thresholds in attributes is a means of identifying states in living organisms. Knowing about states is an invaluable clue to the un­derstanding of dynamic processes in organisms. Applied to metabolite concentration data, the proposed method was able to identify states which were not found with conventional techniques for threshold extraction. A second approach exploits the structure of sets of decision trees for the discovery of com­binatorial dependencies between attributes. Previous work on this issue has focused either on expensive computational methods or the interpretation of single decision trees ­ a very limited exploitation of the data. This has led to incomplete or unstable results. That is why a new method is developed that uses sets of decision trees to overcome these limitations. Both the introduced methods are available as software tools. They can be applied consecu­tively or separately. That way they make up a package of analytical tools that usefully supplement existing methods. By means of these tools, the newly introduced methods were able to confirm existing knowl­edge and to suggest interesting and new relationships between metabolites.}, subject = {Molekulare Bioinformatik}, language = {en} } @phdthesis{Scholz2006, author = {Scholz, Matthias}, title = {Approaches to analyse and interpret biological profile data}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-7839}, school = {Universit{\"a}t Potsdam}, year = {2006}, abstract = {Advances in biotechnologies rapidly increase the number of molecules of a cell which can be observed simultaneously. This includes expression levels of thousands or ten-thousands of genes as well as concentration levels of metabolites or proteins. Such Profile data, observed at different times or at different experimental conditions (e.g., heat or dry stress), show how the biological experiment is reflected on the molecular level. This information is helpful to understand the molecular behaviour and to identify molecules or combination of molecules that characterise specific biological condition (e.g., disease). This work shows the potentials of component extraction algorithms to identify the major factors which influenced the observed data. This can be the expected experimental factors such as the time or temperature as well as unexpected factors such as technical artefacts or even unknown biological behaviour. Extracting components means to reduce the very high-dimensional data to a small set of new variables termed components. Each component is a combination of all original variables. The classical approach for that purpose is the principal component analysis (PCA). It is shown that, in contrast to PCA which maximises the variance only, modern approaches such as independent component analysis (ICA) are more suitable for analysing molecular data. The condition of independence between components of ICA fits more naturally our assumption of individual (independent) factors which influence the data. This higher potential of ICA is demonstrated by a crossing experiment of the model plant Arabidopsis thaliana (Thale Cress). The experimental factors could be well identified and, in addition, ICA could even detect a technical artefact. However, in continuously observations such as in time experiments, the data show, in general, a nonlinear distribution. To analyse such nonlinear data, a nonlinear extension of PCA is used. This nonlinear PCA (NLPCA) is based on a neural network algorithm. The algorithm is adapted to be applicable to incomplete molecular data sets. Thus, it provides also the ability to estimate the missing data. The potential of nonlinear PCA to identify nonlinear factors is demonstrated by a cold stress experiment of Arabidopsis thaliana. The results of component analysis can be used to build a molecular network model. Since it includes functional dependencies it is termed functional network. Applied to the cold stress data, it is shown that functional networks are appropriate to visualise biological processes and thereby reveals molecular dynamics.}, subject = {Bioinformatik}, language = {en} } @phdthesis{Becker2013, author = {Becker, Basil}, title = {Architectural modelling and verification of open service-oriented systems of systems}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-70158}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {Systems of Systems (SoS) have received a lot of attention recently. In this thesis we will focus on SoS that are built atop the techniques of Service-Oriented Architectures and thus combine the benefits and challenges of both paradigms. For this thesis we will understand SoS as ensembles of single autonomous systems that are integrated to a larger system, the SoS. The interesting fact about these systems is that the previously isolated systems are still maintained, improved and developed on their own. Structural dynamics is an issue in SoS, as at every point in time systems can join and leave the ensemble. This and the fact that the cooperation among the constituent systems is not necessarily observable means that we will consider these systems as open systems. Of course, the system has a clear boundary at each point in time, but this can only be identified by halting the complete SoS. However, halting a system of that size is practically impossible. Often SoS are combinations of software systems and physical systems. Hence a failure in the software system can have a serious physical impact what makes an SoS of this kind easily a safety-critical system. The contribution of this thesis is a modelling approach that extends OMG's SoaML and basically relies on collaborations and roles as an abstraction layer above the components. This will allow us to describe SoS at an architectural level. We will also give a formal semantics for our modelling approach which employs hybrid graph-transformation systems. The modelling approach is accompanied by a modular verification scheme that will be able to cope with the complexity constraints implied by the SoS' structural dynamics and size. Building such autonomous systems as SoS without evolution at the architectural level --- i. e. adding and removing of components and services --- is inadequate. Therefore our approach directly supports the modelling and verification of evolution.}, language = {en} } @phdthesis{Ishebabi2010, author = {Ishebabi, Harold}, title = {Architecture synthesis for adaptive multiprocessor systems on chip}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-41316}, school = {Universit{\"a}t Potsdam}, year = {2010}, abstract = {This thesis presents methods for automated synthesis of flexible chip multiprocessor systems from parallel programs targeted at FPGAs to exploit both task-level parallelism and architecture customization. Automated synthesis is necessitated by the complexity of the design space. A detailed description of the design space is provided in order to determine which parameters should be modeled to facilitate automated synthesis by optimizing a cost function, the emphasis being placed on inclusive modeling of parameters from application, architectural and physical subspaces, as well as their joint coverage in order to avoid pre-constraining the design space. Given a parallel program and a set of an IP library, the automated synthesis problem is to simultaneously (i) select processors (ii) map and schedule tasks to them, and (iii) select one or several networks for inter-task communications such that design constraints and optimization objectives are met. The research objective in this thesis is to find a suitable model for automated synthesis, and to evaluate methods of using the model for architectural optimizations. Our contributions are a holistic approach for the design of such systems, corresponding models to facilitate automated synthesis, evaluation of optimization methods using state of the art integer linear and answer set programming, as well as the development of synthesis heuristics to solve runtime challenges.}, language = {en} } @phdthesis{Wist2011, author = {Wist, Dominic}, title = {Attacking complexity in logic synthesis of asynchronous circuits}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-59706}, school = {Universit{\"a}t Potsdam}, year = {2011}, abstract = {Most of the microelectronic circuits fabricated today are synchronous, i.e. they are driven by one or several clock signals. Synchronous circuit design faces several fundamental challenges such as high-speed clock distribution, integration of multiple cores operating at different clock rates, reduction of power consumption and dealing with voltage, temperature, manufacturing and runtime variations. Asynchronous or clockless design plays a key role in alleviating these challenges, however the design and test of asynchronous circuits is much more difficult in comparison to their synchronous counterparts. A driving force for a widespread use of asynchronous technology is the availability of mature EDA (Electronic Design Automation) tools which provide an entire automated design flow starting from an HDL (Hardware Description Language) specification yielding the final circuit layout. Even though there was much progress in developing such EDA tools for asynchronous circuit design during the last two decades, the maturity level as well as the acceptance of them is still not comparable with tools for synchronous circuit design. In particular, logic synthesis (which implies the application of Boolean minimisation techniques) for the entire system's control path can significantly improve the efficiency of the resulting asynchronous implementation, e.g. in terms of chip area and performance. However, logic synthesis, in particular for asynchronous circuits, suffers from complexity problems. Signal Transitions Graphs (STGs) are labelled Petri nets which are a widely used to specify the interface behaviour of speed independent (SI) circuits - a robust subclass of asynchronous circuits. STG decomposition is a promising approach to tackle complexity problems like state space explosion in logic synthesis of SI circuits. The (structural) decomposition of STGs is guided by a partition of the output signals and generates a usually much smaller component STG for each partition member, i.e. a component STG with a much smaller state space than the initial specification. However, decomposition can result in component STGs that in isolation have so-called irreducible CSC conflicts (i.e. these components are not SI synthesisable anymore) even if the specification has none of them. A new approach is presented to avoid such conflicts by introducing internal communication between the components. So far, STG decompositions are guided by the finest output partitions, i.e. one output per component. However, this might not yield optimal circuit implementations. Efficient heuristics are presented to determine coarser partitions leading to improved circuits in terms of chip area. For the new algorithms correctness proofs are given and their implementations are incorporated into the decomposition tool DESIJ. The presented techniques are successfully applied to some benchmarks - including 'real-life' specifications arising in the context of control resynthesis - which delivered promising results.}, language = {en} } @phdthesis{Weise2021, author = {Weise, Matthias}, title = {Auswahl von Selektions- und Manipulationstechniken f{\"u}r Virtual Reality-Anwendungen}, doi = {10.25932/publishup-53458}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-534586}, school = {Universit{\"a}t Potsdam}, pages = {iii, 218}, year = {2021}, abstract = {Die stetige Weiterentwicklung von VR-Systemen bietet neue M{\"o}glichkeiten der Interaktion mit virtuellen Objekten im dreidimensionalen Raum, stellt Entwickelnde von VRAnwendungen aber auch vor neue Herausforderungen. Selektions- und Manipulationstechniken m{\"u}ssen unter Ber{\"u}cksichtigung des Anwendungsszenarios, der Zielgruppe und der zur Verf{\"u}gung stehenden Ein- und Ausgabeger{\"a}te ausgew{\"a}hlt werden. Diese Arbeit leistet einen Beitrag dazu, die Auswahl von passenden Interaktionstechniken zu unterst{\"u}tzen. Hierf{\"u}r wurde eine repr{\"a}sentative Menge von Selektions- und Manipulationstechniken untersucht und, unter Ber{\"u}cksichtigung existierender Klassifikationssysteme, eine Taxonomie entwickelt, die die Analyse der Techniken hinsichtlich interaktionsrelevanter Eigenschaften erm{\"o}glicht. Auf Basis dieser Taxonomie wurden Techniken ausgew{\"a}hlt, die in einer explorativen Studie verglichen wurden, um R{\"u}ckschl{\"u}sse auf die Dimensionen der Taxonomie zu ziehen und neue Indizien f{\"u}r Vor- und Nachteile der Techniken in spezifischen Anwendungsszenarien zu generieren. Die Ergebnisse der Arbeit m{\"u}nden in eine Webanwendung, die Entwickelnde von VR-Anwendungen gezielt dabei unterst{\"u}tzt, passende Selektions- und Manipulationstechniken f{\"u}r ein Anwendungsszenario auszuw{\"a}hlen, indem Techniken auf Basis der Taxonomie gefiltert und unter Verwendung der Resultate aus der Studie sortiert werden k{\"o}nnen.}, language = {de} } @phdthesis{Dreseler2022, author = {Dreseler, Markus}, title = {Automatic tiering for in-memory database systems}, doi = {10.25932/publishup-55825}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-558253}, school = {Universit{\"a}t Potsdam}, pages = {vii, 143}, year = {2022}, abstract = {A decade ago, it became feasible to store multi-terabyte databases in main memory. These in-memory databases (IMDBs) profit from DRAM's low latency and high throughput as well as from the removal of costly abstractions used in disk-based systems, such as the buffer cache. However, as the DRAM technology approaches physical limits, scaling these databases becomes difficult. Non-volatile memory (NVM) addresses this challenge. This new type of memory is persistent, has more capacity than DRAM (4x), and does not suffer from its density-inhibiting limitations. Yet, as NVM has a higher latency (5-15x) and a lower throughput (0.35x), it cannot fully replace DRAM. IMDBs thus need to navigate the trade-off between the two memory tiers. We present a solution to this optimization problem. Leveraging information about access frequencies and patterns, our solution utilizes NVM's additional capacity while minimizing the associated access costs. Unlike buffer cache-based implementations, our tiering abstraction does not add any costs when reading data from DRAM. As such, it can act as a drop-in replacement for existing IMDBs. Our contributions are as follows: (1) As the foundation for our research, we present Hyrise, an open-source, columnar IMDB that we re-engineered and re-wrote from scratch. Hyrise enables realistic end-to-end benchmarks of SQL workloads and offers query performance which is competitive with other research and commercial systems. At the same time, Hyrise is easy to understand and modify as repeatedly demonstrated by its uses in research and teaching. (2) We present a novel memory management framework for different memory and storage tiers. By encapsulating the allocation and access methods of these tiers, we enable existing data structures to be stored on different tiers with no modifications to their implementation. Besides DRAM and NVM, we also support and evaluate SSDs and have made provisions for upcoming technologies such as disaggregated memory. (3) To identify the parts of the data that can be moved to (s)lower tiers with little performance impact, we present a tracking method that identifies access skew both in the row and column dimensions and that detects patterns within consecutive accesses. Unlike existing methods that have substantial associated costs, our access counters exhibit no identifiable overhead in standard benchmarks despite their increased accuracy. (4) Finally, we introduce a tiering algorithm that optimizes the data placement for a given memory budget. In the TPC-H benchmark, this allows us to move 90\% of the data to NVM while the throughput is reduced by only 10.8\% and the query latency is increased by 11.6\%. With this, we outperform approaches that ignore the workload's access skew and access patterns and increase the query latency by 20\% or more. Individually, our contributions provide novel approaches to current challenges in systems engineering and database research. Combining them allows IMDBs to scale past the limits of DRAM while continuing to profit from the benefits of in-memory computing.}, language = {en} } @phdthesis{SchulzHanke2023, author = {Schulz-Hanke, Christian}, title = {BCH Codes mit kombinierter Korrektur und Erkennung}, doi = {10.25932/publishup-61794}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-617943}, school = {Universit{\"a}t Potsdam}, pages = {ii, 191}, year = {2023}, abstract = {BCH Codes mit kombinierter Korrektur und Erkennung In dieser Arbeit wird auf Grundlage des BCH Codes untersucht, wie eine Fehlerkorrektur mit einer Erkennung h{\"o}herer Fehleranzahlen kombiniert werden kann. Mit dem Verfahren der 1-Bit Korrektur mit zus{\"a}tzlicher Erkennung h{\"o}herer Fehler wurde ein Ansatz entwickelt, welcher die Erkennung zus{\"a}tzlicher Fehler durch das parallele L{\"o}sen einfacher Gleichungen der Form s_x = s_1^x durchf{\"u}hrt. Die Anzahl dieser Gleichungen ist linear zu der Anzahl der zu {\"u}berpr{\"u}fenden h{\"o}heren Fehler. In dieser Arbeit wurde zus{\"a}tzlich f{\"u}r bis zu 4-Bit Korrekturen mit zus{\"a}tzlicher Erkennung h{\"o}herer Fehler ein weiterer allgemeiner Ansatz vorgestellt. Dabei werden parallel f{\"u}r alle korrigierbaren Fehleranzahlen spekulative Fehlerkorrekturen durchgef{\"u}hrt. Aus den bestimmten Fehlerstellen werden spekulative Syndromkomponenten erzeugt, durch welche die Fehlerstellen best{\"a}tigt und h{\"o}here erkennbare Fehleranzahlen ausgeschlossen werden k{\"o}nnen. Die vorgestellten Ans{\"a}tze unterscheiden sich von dem in entwickelten Ansatz, bei welchem die Anzahl der Fehlerstellen durch die Berechnung von Determinanten in absteigender Reihenfolge berechnet wird, bis die erste Determinante 0 bildet. Bei dem bekannten Verfahren ist durch die Berechnung der Determinanten eine faktorielle Anzahl an Berechnungen in Relation zu der Anzahl zu {\"u}berpr{\"u}fender Fehler durchzuf{\"u}hren. Im Vergleich zu dem bekannten sequentiellen Verfahrens nach Berlekamp Massey besitzen die Berechnungen im vorgestellten Ansatz simple Gleichungen und k{\"o}nnen parallel durchgef{\"u}hrt werden.Bei dem bekannten Verfahren zur parallelen Korrektur von 4-Bit Fehlern ist eine Gleichung vierten Grades im GF(2^m) zu l{\"o}sen. Dies erfolgt, indem eine Hilfsgleichung dritten Grades und vier Gleichungen zweiten Grades parallel gel{\"o}st werden. In der vorliegenden Arbeit wurde gezeigt, dass sich eine Gleichung zweiten Grades einsparen l{\"a}sst, wodurch sich eine Vereinfachung der Hardware bei einer parallelen Realisierung der 4-Bit Korrektur ergibt. Die erzielten Ergebnisse wurden durch umfangreiche Simulationen in Software und Hardwareimplementierungen {\"u}berpr{\"u}ft.}, language = {de} }