@misc{Wallenta2014, author = {Wallenta, Daniel}, title = {A Lefschetz fixed point formula for elliptic quasicomplexes}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch Naturwissenschaftliche Reihe}, number = {885}, issn = {1866-8372}, doi = {10.25932/publishup-43547}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-435471}, pages = {577 -- 587}, year = {2014}, abstract = {In a recent paper, the Lefschetz number for endomorphisms (modulo trace class operators) of sequences of trace class curvature was introduced. We show that this is a well defined, canonical extension of the classical Lefschetz number and establish the homotopy invariance of this number. Moreover, we apply the results to show that the Lefschetz fixed point formula holds for geometric quasiendomorphisms of elliptic quasicomplexes.}, language = {en} } @article{Respondek2014, author = {Respondek, Tobias}, title = {A workflow for computing potential areas for wind turbines}, series = {Process design for natural scientists: an agile model-driven approach}, journal = {Process design for natural scientists: an agile model-driven approach}, number = {500}, publisher = {Springer}, address = {Berlin}, isbn = {978-3-662-45005-5}, pages = {200 -- 215}, year = {2014}, abstract = {This paper describes the implementation of a workflow model for service-oriented computing of potential areas for wind turbines in jABC. By implementing a re-executable model the manual effort of a multi-criteria site analysis can be reduced. The aim is to determine the shift of typical geoprocessing tools of geographic information systems (GIS) from the desktop to the web. The analysis is based on a vector data set and mainly uses web services of the "Center for Spatial Information Science and Systems" (CSISS). This paper discusses effort, benefits and problems associated with the use of the web services.}, language = {en} } @phdthesis{Lindauer2014, author = {Lindauer, T. Marius}, title = {Algorithm selection, scheduling and configuration of Boolean constraint solvers}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-71260}, school = {Universit{\"a}t Potsdam}, pages = {ii, 130}, year = {2014}, abstract = {Boolean constraint solving technology has made tremendous progress over the last decade, leading to industrial-strength solvers, for example, in the areas of answer set programming (ASP), the constraint satisfaction problem (CSP), propositional satisfiability (SAT) and satisfiability of quantified Boolean formulas (QBF). However, in all these areas, there exist multiple solving strategies that work well on different applications; no strategy dominates all other strategies. Therefore, no individual solver shows robust state-of-the-art performance in all kinds of applications. Additionally, the question arises how to choose a well-performing solving strategy for a given application; this is a challenging question even for solver and domain experts. One way to address this issue is the use of portfolio solvers, that is, a set of different solvers or solver configurations. We present three new automatic portfolio methods: (i) automatic construction of parallel portfolio solvers (ACPP) via algorithm configuration,(ii) solving the \$NP\$-hard problem of finding effective algorithm schedules with Answer Set Programming (aspeed), and (iii) a flexible algorithm selection framework (claspfolio2) allowing for fair comparison of different selection approaches. All three methods show improved performance and robustness in comparison to individual solvers on heterogeneous instance sets from many different applications. Since parallel solvers are important to effectively solve hard problems on parallel computation systems (e.g., multi-core processors), we extend all three approaches to be effectively applicable in parallel settings. We conducted extensive experimental studies different instance sets from ASP, CSP, MAXSAT, Operation Research (OR), SAT and QBF that indicate an improvement in the state-of-the-art solving heterogeneous instance sets. Last but not least, from our experimental studies, we deduce practical advice regarding the question when to apply which of our methods.}, language = {en} } @book{MeinelSchnjakinMetzkeetal.2014, author = {Meinel, Christoph and Schnjakin, Maxim and Metzke, Tobias and Freitag, Markus}, title = {Anbieter von Cloud Speicherdiensten im {\"U}berblick}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-274-2}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-68780}, publisher = {Universit{\"a}t Potsdam}, pages = {84}, year = {2014}, abstract = {Durch die immer st{\"a}rker werdende Flut an digitalen Informationen basieren immer mehr Anwendungen auf der Nutzung von kosteng{\"u}nstigen Cloud Storage Diensten. Die Anzahl der Anbieter, die diese Dienste zur Verf{\"u}gung stellen, hat sich in den letzten Jahren deutlich erh{\"o}ht. Um den passenden Anbieter f{\"u}r eine Anwendung zu finden, m{\"u}ssen verschiedene Kriterien individuell ber{\"u}cksichtigt werden. In der vorliegenden Studie wird eine Auswahl an Anbietern etablierter Basic Storage Diensten vorgestellt und miteinander verglichen. F{\"u}r die Gegen{\"u}berstellung werden Kriterien extrahiert, welche bei jedem der untersuchten Anbieter anwendbar sind und somit eine m{\"o}glichst objektive Beurteilung erlauben. Hierzu geh{\"o}ren unter anderem Kosten, Recht, Sicherheit, Leistungsf{\"a}higkeit sowie bereitgestellte Schnittstellen. Die vorgestellten Kriterien k{\"o}nnen genutzt werden, um Cloud Storage Anbieter bez{\"u}glich eines konkreten Anwendungsfalles zu bewerten.}, language = {de} } @phdthesis{Tinnefeld2014, author = {Tinnefeld, Christian}, title = {Building a columnar database on shared main memory-based storage}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-72063}, school = {Universit{\"a}t Potsdam}, pages = {175}, year = {2014}, abstract = {In the field of disk-based parallel database management systems exists a great variety of solutions based on a shared-storage or a shared-nothing architecture. In contrast, main memory-based parallel database management systems are dominated solely by the shared-nothing approach as it preserves the in-memory performance advantage by processing data locally on each server. We argue that this unilateral development is going to cease due to the combination of the following three trends: a) Nowadays network technology features remote direct memory access (RDMA) and narrows the performance gap between accessing main memory inside a server and of a remote server to and even below a single order of magnitude. b) Modern storage systems scale gracefully, are elastic, and provide high-availability. c) A modern storage system such as Stanford's RAMCloud even keeps all data resident in main memory. Exploiting these characteristics in the context of a main-memory parallel database management system is desirable. The advent of RDMA-enabled network technology makes the creation of a parallel main memory DBMS based on a shared-storage approach feasible. This thesis describes building a columnar database on shared main memory-based storage. The thesis discusses the resulting architecture (Part I), the implications on query processing (Part II), and presents an evaluation of the resulting solution in terms of performance, high-availability, and elasticity (Part III). In our architecture, we use Stanford's RAMCloud as shared-storage, and the self-designed and developed in-memory AnalyticsDB as relational query processor on top. AnalyticsDB encapsulates data access and operator execution via an interface which allows seamless switching between local and remote main memory, while RAMCloud provides not only storage capacity, but also processing power. Combining both aspects allows pushing-down the execution of database operators into the storage system. We describe how the columnar data processed by AnalyticsDB is mapped to RAMCloud's key-value data model and how the performance advantages of columnar data storage can be preserved. The combination of fast network technology and the possibility to execute database operators in the storage system opens the discussion for site selection. We construct a system model that allows the estimation of operator execution costs in terms of network transfer, data processed in memory, and wall time. This can be used for database operators that work on one relation at a time - such as a scan or materialize operation - to discuss the site selection problem (data pull vs. operator push). Since a database query translates to the execution of several database operators, it is possible that the optimal site selection varies per operator. For the execution of a database operator that works on two (or more) relations at a time, such as a join, the system model is enriched by additional factors such as the chosen algorithm (e.g. Grace- vs. Distributed Block Nested Loop Join vs. Cyclo-Join), the data partitioning of the respective relations, and their overlapping as well as the allowed resource allocation. We present an evaluation on a cluster with 60 nodes where all nodes are connected via RDMA-enabled network equipment. We show that query processing performance is about 2.4x slower if everything is done via the data pull operator execution strategy (i.e. RAMCloud is being used only for data access) and about 27\% slower if operator execution is also supported inside RAMCloud (in comparison to operating only on main memory inside a server without any network communication at all). The fast-crash recovery feature of RAMCloud can be leveraged to provide high-availability, e.g. a server crash during query execution only delays the query response for about one second. Our solution is elastic in a way that it can adapt to changing workloads a) within seconds, b) without interruption of the ongoing query processing, and c) without manual intervention.}, language = {en} } @phdthesis{Steinert2014, author = {Steinert, Bastian}, title = {Built-in recovery support for explorative programming}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-71305}, school = {Universit{\"a}t Potsdam}, year = {2014}, abstract = {This work introduces concepts and corresponding tool support to enable a complementary approach in dealing with recovery. Programmers need to recover a development state, or a part thereof, when previously made changes reveal undesired implications. However, when the need arises suddenly and unexpectedly, recovery often involves expensive and tedious work. To avoid tedious work, literature recommends keeping away from unexpected recovery demands by following a structured and disciplined approach, which consists of the application of various best practices including working only on one thing at a time, performing small steps, as well as making proper use of versioning and testing tools. However, the attempt to avoid unexpected recovery is both time-consuming and error-prone. On the one hand, it requires disproportionate effort to minimize the risk of unexpected situations. On the other hand, applying recommended practices selectively, which saves time, can hardly avoid recovery. In addition, the constant need for foresight and self-control has unfavorable implications. It is exhaustive and impedes creative problem solving. This work proposes to make recovery fast and easy and introduces corresponding support called CoExist. Such dedicated support turns situations of unanticipated recovery from tedious experiences into pleasant ones. It makes recovery fast and easy to accomplish, even if explicit commits are unavailable or tests have been ignored for some time. When mistakes and unexpected insights are no longer associated with tedious corrective actions, programmers are encouraged to change source code as a means to reason about it, as opposed to making changes only after structuring and evaluating them mentally. This work further reports on an implementation of the proposed tool support in the Squeak/Smalltalk development environment. The development of the tools has been accompanied by regular performance and usability tests. In addition, this work investigates whether the proposed tools affect programmers' performance. In a controlled lab study, 22 participants improved the design of two different applications. Using a repeated measurement setup, the study examined the effect of providing CoExist on programming performance. The result of analyzing 88 hours of programming suggests that built-in recovery support as provided with CoExist positively has a positive effect on programming performance in explorative programming tasks.}, language = {en} } @misc{HoosLindauerSchaub2014, author = {Hoos, Holger and Lindauer, Marius and Schaub, Torsten H.}, title = {claspfolio 2}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {606}, issn = {1866-8372}, doi = {10.25932/publishup-41612}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-416129}, pages = {17}, year = {2014}, abstract = {Building on the award-winning, portfolio-based ASP solver claspfolio, we present claspfolio 2, a modular and open solver architecture that integrates several different portfolio-based algorithm selection approaches and techniques. The claspfolio 2 solver framework supports various feature generators, solver selection approaches, solver portfolios, as well as solver-schedule-based pre-solving techniques. The default configuration of claspfolio 2 relies on a light-weight version of the ASP solver clasp to generate static and dynamic instance features. The flexible open design of claspfolio 2 is a distinguishing factor even beyond ASP. As such, it provides a unique framework for comparing and combining existing portfolio-based algorithm selection approaches and techniques in a single, unified framework. Taking advantage of this, we conducted an extensive experimental study to assess the impact of different feature sets, selection approaches and base solver portfolios. In addition to gaining substantial insights into the utility of the various approaches and techniques, we identified a default configuration of claspfolio 2 that achieves substantial performance gains not only over clasp's default configuration and the earlier version of claspfolio, but also over manually tuned configurations of clasp.}, language = {en} } @book{OPUS4-6813, title = {Cloud security mechanisms}, number = {87}, editor = {Neuhaus, Christian and Polze, Andreas}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-281-0}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-68168}, publisher = {Universit{\"a}t Potsdam}, pages = {78}, year = {2014}, abstract = {Cloud computing has brought great benefits in cost and flexibility for provisioning services. The greatest challenge of cloud computing remains however the question of security. The current standard tools in access control mechanisms and cryptography can only partly solve the security challenges of cloud infrastructures. In the recent years of research in security and cryptography, novel mechanisms, protocols and algorithms have emerged that offer new ways to create secure services atop cloud infrastructures. This report provides introductions to a selection of security mechanisms that were part of the "Cloud Security Mechanisms" seminar in summer term 2013 at HPI.}, language = {en} } @phdthesis{Schnjakin2014, author = {Schnjakin, Maxim}, title = {Cloud-RAID}, pages = {137}, year = {2014}, language = {de} } @article{Lis2014, author = {Lis, Monika}, title = {Constructing a Phylogenetic Tree}, series = {Process Design for Natural Scientists: an agile model-driven approach}, journal = {Process Design for Natural Scientists: an agile model-driven approach}, number = {500}, editor = {Lambrecht, Anna-Lena and Margaria, Tiziana}, publisher = {Springer Verlag}, address = {Berlin}, isbn = {978-3-662-45005-5}, issn = {1865-0929}, pages = {101 -- 109}, year = {2014}, abstract = {In this project I constructed a workflow that takes a DNA sequence as input and provides a phylogenetic tree, consisting of the input sequence and other sequences which were found during a database search. In this phylogenetic tree the sequences are arranged depending on similarities. In bioinformatics, constructing phylogenetic trees is often used to explore the evolutionary relationships of genes or organisms and to understand the mechanisms of evolution itself.}, language = {en} } @article{Noack2014, author = {Noack, Franziska}, title = {CREADED: Colored-Relief application for digital elevation data}, series = {Process design for natural scientists: an agile model-driven approach}, journal = {Process design for natural scientists: an agile model-driven approach}, number = {500}, publisher = {Springer}, address = {Berlin}, isbn = {978-3-662-45005-5}, issn = {1865-0929}, pages = {186 -- 199}, year = {2014}, abstract = {In the geoinformatics field, remote sensing data is often used for analyzing the characteristics of the current investigation area. This includes DEMs, which are simple raster grids containing grey scales representing the respective elevation values. The project CREADED that is presented in this paper aims at making these monochrome raster images more significant and more intuitively interpretable. For this purpose, an executable interactive model for creating a colored and relief-shaded Digital Elevation Model (DEM) has been designed using the jABC framework. The process is based on standard jABC-SIBs and SIBs that provide specific GIS functions, which are available as Web services, command line tools and scripts.}, language = {en} } @article{Kind2014, author = {Kind, Josephine}, title = {Creation of topographic maps}, series = {Process design for natural scientists: an agile model-driven approach}, journal = {Process design for natural scientists: an agile model-driven approach}, number = {500}, publisher = {Springer}, address = {Berlin}, isbn = {978-3-662-45005-5}, pages = {229 -- 238}, year = {2014}, abstract = {Location analyses are among the most common tasks while working with spatial data and geographic information systems. Automating the most frequently used procedures is therefore an important aspect of improving their usability. In this context, this project aims to design and implement a workflow, providing some basic tools for a location analysis. For the implementation with jABC, the workflow was applied to the problem of finding a suitable location for placing an artificial reef. For this analysis three parameters (bathymetry, slope and grain size of the ground material) were taken into account, processed, and visualized with the The Generic Mapping Tools (GMT), which were integrated into the workflow as jETI-SIBs. The implemented workflow thereby showed that the approach to combine jABC with GMT resulted in an user-centric yet user-friendly tool with high-quality cartographic outputs.}, language = {en} } @phdthesis{Heise2014, author = {Heise, Arvid}, title = {Data cleansing and integration operators for a parallel data analytics platform}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-77100}, school = {Universit{\"a}t Potsdam}, pages = {ii, 179}, year = {2014}, abstract = {The data quality of real-world datasets need to be constantly monitored and maintained to allow organizations and individuals to reliably use their data. Especially, data integration projects suffer from poor initial data quality and as a consequence consume more effort and money. Commercial products and research prototypes for data cleansing and integration help users to improve the quality of individual and combined datasets. They can be divided into either standalone systems or database management system (DBMS) extensions. On the one hand, standalone systems do not interact well with DBMS and require time-consuming data imports and exports. On the other hand, DBMS extensions are often limited by the underlying system and do not cover the full set of data cleansing and integration tasks. We overcome both limitations by implementing a concise set of five data cleansing and integration operators on the parallel data analytics platform Stratosphere. We define the semantics of the operators, present their parallel implementation, and devise optimization techniques for individual operators and combinations thereof. Users specify declarative queries in our query language METEOR with our new operators to improve the data quality of individual datasets or integrate them to larger datasets. By integrating the data cleansing operators into the higher level language layer of Stratosphere, users can easily combine cleansing operators with operators from other domains, such as information extraction, to complex data flows. Through a generic description of the operators, the Stratosphere optimizer reorders operators even from different domains to find better query plans. As a case study, we reimplemented a part of the large Open Government Data integration project GovWILD with our new operators and show that our queries run significantly faster than the original GovWILD queries, which rely on relational operators. Evaluation reveals that our operators exhibit good scalability on up to 100 cores, so that even larger inputs can be efficiently processed by scaling out to more machines. Finally, our scripts are considerably shorter than the original GovWILD scripts, which results in better maintainability of the scripts.}, language = {en} } @article{Blaese2014, author = {Blaese, Leif}, title = {Data mining for unidentified protein squences}, series = {Process design for natural scientists: an agile model-driven approach}, journal = {Process design for natural scientists: an agile model-driven approach}, number = {500}, publisher = {Springer}, address = {Berlin}, isbn = {978-3-662-45005-5}, issn = {1865-0929}, pages = {73 -- 87}, year = {2014}, abstract = {Through the use of next generation sequencing (NGS) technology, a lot of newly sequenced organisms are now available. Annotating those genes is one of the most challenging tasks in sequence biology. Here, we present an automated workflow to find homologue proteins, annotate sequences according to function and create a three-dimensional model.}, language = {en} } @misc{HaegeleSchlagenhaufRappetal.2014, author = {H{\"a}gele, Claudia and Schlagenhauf, Florian and Rapp, Michael A. and Sterzer, Philipp and Beck, Anne and Bermpohl, Felix and Stoy, Meline and Str{\"o}hle, Andreas and Wittchen, Hans-Ulrich and Dolan, Raymond J. and Heinz, Andreas}, title = {Dimensional psychiatry}, series = {Postprints der Universit{\"a}t Potsdam : Humanwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Humanwissenschaftliche Reihe}, number = {653}, issn = {1866-8364}, doi = {10.25932/publishup-43106}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-431064}, pages = {331 -- 341}, year = {2014}, abstract = {A dimensional approach in psychiatry aims to identify core mechanisms of mental disorders across nosological boundaries. We compared anticipation of reward between major psychiatric disorders, and investigated whether reward anticipation is impaired in several mental disorders and whether there is a common psychopathological correlate (negative mood) of such an impairment. We used functional magnetic resonance imaging (fMRI) and a monetary incentive delay (MID) task to study the functional correlates of reward anticipation across major psychiatric disorders in 184 subjects, with the diagnoses of alcohol dependence (n = 26), schizophrenia (n = 44), major depressive disorder (MDD, n = 24), bipolar disorder (acute manic episode, n = 13), attention deficit/hyperactivity disorder (ADHD, n = 23), and healthy controls (n = 54). Subjects' individual Beck Depression Inventory-and State-Trait Anxiety Inventory-scores were correlated with clusters showing significant activation during reward anticipation. During reward anticipation, we observed significant group differences in ventral striatal (VS) activation: patients with schizophrenia, alcohol dependence, and major depression showed significantly less ventral striatal activation compared to healthy controls. Depressive symptoms correlated with dysfunction in reward anticipation regardless of diagnostic entity. There was no significant correlation between anxiety symptoms and VS functional activation. Our findings demonstrate a neurobiological dysfunction related to reward prediction that transcended disorder categories and was related to measures of depressed mood. The findings underline the potential of a dimensional approach in psychiatry and strengthen the hypothesis that neurobiological research in psychiatric disorders can be targeted at core mechanisms that are likely to be implicated in a range of clinical entities.}, language = {en} } @unpublished{GrapentinHeidlerKorschetal.2014, author = {Grapentin, Andreas and Heidler, Kirstin and Korsch, Dimitri and Kumar Sah, Rakesh and Kunzmann, Nicco and Henning, Johannes and Mattis, Toni and Rein, Patrick and Seckler, Eric and Groneberg, Bj{\"o}rn and Zimmermann, Florian}, title = {Embedded operating system projects}, number = {90}, editor = {Hentschel, Uwe and Richter, Daniel and Polze, Andreas}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-296-4}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-69154}, pages = {xi, 87}, year = {2014}, abstract = {In today's life, embedded systems are ubiquitous. But they differ from traditional desktop systems in many aspects - these include predictable timing behavior (real-time), the management of scarce resources (memory, network), reliable communication protocols, energy management, special purpose user-interfaces (headless operation), system configuration, programming languages (to support software/hardware co-design), and modeling techniques. Within this technical report, authors present results from the lecture "Operating Systems for Embedded Computing" that has been offered by the "Operating Systems and Middleware" group at HPI in Winter term 2013/14. Focus of the lecture and accompanying projects was on principles of real-time computing. Students had the chance to gather practical experience with a number of different OSes and applications and present experiences with near-hardware programming. Projects address the entire spectrum, from bare-metal programming to harnessing a real-time OS to exercising the full software/hardware co-design cycle. Three outstanding projects are at the heart of this technical report. Project 1 focuses on the development of a bare-metal operating system for LEGO Mindstorms EV3. While still a toy, it comes with a powerful ARM processor, 64 MB of main memory, standard interfaces, such as Bluetooth and network protocol stacks. EV3 runs a version of 1 1 Introduction Linux. Sources are available from Lego's web site. However, many devices and their driver software are proprietary and not well documented. Developing a new, bare-metal OS for the EV3 requires an understanding of the EV3 boot process. Since no standard input/output devices are available, initial debugging steps are tedious. After managing these initial steps, the project was able to adapt device drivers for a few Lego devices to an extent that a demonstrator (the Segway application) could be successfully run on the new OS. Project 2 looks at the EV3 from a different angle. The EV3 is running a pretty decent version of Linux- in principle, the RT_PREEMPT patch can turn any Linux system into a real-time OS by modifying the behavior of a number of synchronization constructs at the heart of the OS. Priority inversion is a problem that is solved by protocols such as priority inheritance or priority ceiling. Real-time OSes implement at least one of the protocols. The central idea of the project was the comparison of non-real-time and real-time variants of Linux on the EV3 hardware. A task set that showed effects of priority inversion on standard EV3 Linux would operate flawlessly on the Linux version with the RT_PREEMPT-patch applied. If only patching Lego's version of Linux was that easy... Project 3 takes the notion of real-time computing more seriously. The application scenario was centered around our Carrera Digital 132 racetrack. Obtaining position information from the track, controlling individual cars, detecting and modifying the Carrera Digital protocol required design and implementation of custom controller hardware. What to implement in hardware, firmware, and what to implement in application software - this was the central question addressed by the project.}, language = {en} } @phdthesis{Takouna2014, author = {Takouna, Ibrahim}, title = {Energy-efficient and performance-aware virtual machine management for cloud data centers}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-72399}, school = {Universit{\"a}t Potsdam}, year = {2014}, abstract = {Virtualisierte Cloud Datenzentren stellen nach Bedarf Ressourcen zur Verf{\"u}gu-ng, erm{\"o}glichen agile Ressourcenbereitstellung und beherbergen heterogene Applikationen mit verschiedenen Anforderungen an Ressourcen. Solche Datenzentren verbrauchen enorme Mengen an Energie, was die Erh{\"o}hung der Betriebskosten, der W{\"a}rme innerhalb der Zentren und des Kohlendioxidausstoßes verursacht. Der Anstieg des Energieverbrauches kann durch ein ineffektives Ressourcenmanagement, das die ineffiziente Ressourcenausnutzung verursacht, entstehen. Die vorliegende Dissertation stellt detaillierte Modelle und neue Verfahren f{\"u}r virtualisiertes Ressourcenmanagement in Cloud Datenzentren vor. Die vorgestellten Verfahren ziehen das Service-Level-Agreement (SLA) und die Heterogenit{\"a}t der Auslastung bez{\"u}glich des Bedarfs an Speicherzugriffen und Kommunikationsmustern von Web- und HPC- (High Performance Computing) Applikationen in Betracht. Um die pr{\"a}sentierten Techniken zu evaluieren, verwenden wir Simulationen und echte Protokollierung der Auslastungen von Web- und HPC- Applikationen. Außerdem vergleichen wir unser Techniken und Verfahren mit anderen aktuellen Verfahren durch die Anwendung von verschiedenen Performance Metriken. Die Hauptbeitr{\"a}ge dieser Dissertation sind Folgendes: Ein Proaktives auf robuster Optimierung basierendes Ressourcenbereitstellungsverfahren. Dieses Verfahren erh{\"o}ht die F{\"a}higkeit der Hostes zur Verf{\"u}g-ungsstellung von mehr VMs. Gleichzeitig aber wird der unn{\"o}tige Energieverbrauch minimiert. Zus{\"a}tzlich mindert diese Technik unerw{\"u}nschte {\"A}nde-rungen im Energiezustand des Servers. Die vorgestellte Technik nutzt einen auf Intervall basierenden Vorhersagealgorithmus zur Implementierung einer robusten Optimierung. Dabei werden unsichere Anforderungen in Betracht gezogen. Ein adaptives und auf Intervall basierendes Verfahren zur Vorhersage des Arbeitsaufkommens mit hohen, in k{\"u}rzer Zeit auftretenden Schwankungen. Die Intervall basierende Vorhersage ist implementiert in der Standard Abweichung Variante und in der Median absoluter Abweichung Variante. Die Intervall-{\"A}nderungen basieren auf einem adaptiven Vertrauensfenster um die Schwankungen des Arbeitsaufkommens zu bew{\"a}ltigen. Eine robuste VM Zusammenlegung f{\"u}r ein effizientes Energie und Performance Management. Dies erm{\"o}glicht die gegenseitige Abh{\"a}ngigkeit zwischen der Energie und der Performance zu minimieren. Unser Verfahren reduziert die Anzahl der VM-Migrationen im Vergleich mit den neu vor kurzem vorgestellten Verfahren. Dies tr{\"a}gt auch zur Reduzierung des durch das Netzwerk verursachten Energieverbrauches. Außerdem reduziert dieses Verfahren SLA-Verletzungen und die Anzahl von {\"A}nderungen an Energiezus-t{\"a}nden. Ein generisches Modell f{\"u}r das Netzwerk eines Datenzentrums um die verz{\"o}-gerte Kommunikation und ihre Auswirkung auf die VM Performance und auf die Netzwerkenergie zu simulieren. Außerdem wird ein generisches Modell f{\"u}r ein Memory-Bus des Servers vorgestellt. Dieses Modell beinhaltet auch Modelle f{\"u}r die Latenzzeit und den Energieverbrauch f{\"u}r verschiedene Memory Frequenzen. Dies erlaubt eine Simulation der Memory Verz{\"o}gerung und ihre Auswirkung auf die VM-Performance und auf den Memory Energieverbrauch. Kommunikation bewusste und Energie effiziente Zusammenlegung f{\"u}r parallele Applikationen um die dynamische Entdeckung von Kommunikationsmustern und das Umplanen von VMs zu erm{\"o}glichen. Das Umplanen von VMs benutzt eine auf den entdeckten Kommunikationsmustern basierende Migration. Eine neue Technik zur Entdeckung von dynamischen Mustern ist implementiert. Sie basiert auf der Signal Verarbeitung des Netzwerks von VMs, anstatt die Informationen des virtuellen Umstellung der Hosts oder der Initiierung der VMs zu nutzen. Das Ergebnis zeigt, dass unsere Methode die durchschnittliche Anwendung des Netzwerks reduziert und aufgrund der Reduzierung der aktiven Umstellungen Energie gespart. Außerdem bietet sie eine bessere VM Performance im Vergleich zu der CPU-basierten Platzierung. Memory bewusste VM Zusammenlegung f{\"u}r unabh{\"a}ngige VMs. Sie nutzt die Vielfalt des VMs Memory Zuganges um die Anwendung vom Memory-Bus der Hosts zu balancieren. Die vorgestellte Technik, Memory-Bus Load Balancing (MLB), verteilt die VMs reaktiv neu im Bezug auf ihre Anwendung vom Memory-Bus. Sie nutzt die VM Migration um die Performance des gesamtem Systems zu verbessern. Außerdem sind die dynamische Spannung, die Frequenz Skalierung des Memory und die MLB Methode kombiniert um ein besseres Energiesparen zu leisten.}, language = {en} } @phdthesis{Hebig2014, author = {Hebig, Regina}, title = {Evolution of model-driven engineering settings in practice}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-70761}, school = {Universit{\"a}t Potsdam}, year = {2014}, abstract = {Nowadays, software systems are getting more and more complex. To tackle this challenge most diverse techniques, such as design patterns, service oriented architectures (SOA), software development processes, and model-driven engineering (MDE), are used to improve productivity, while time to market and quality of the products stay stable. Multiple of these techniques are used in parallel to profit from their benefits. While the use of sophisticated software development processes is standard, today, MDE is just adopted in practice. However, research has shown that the application of MDE is not always successful. It is not fully understood when advantages of MDE can be used and to what degree MDE can also be disadvantageous for productivity. Further, when combining different techniques that aim to affect the same factor (e.g. productivity) the question arises whether these techniques really complement each other or, in contrast, compensate their effects. Due to that, there is the concrete question how MDE and other techniques, such as software development process, are interrelated. Both aspects (advantages and disadvantages for productivity as well as the interrelation to other techniques) need to be understood to identify risks relating to the productivity impact of MDE. Before studying MDE's impact on productivity, it is necessary to investigate the range of validity that can be reached for the results. This includes two questions. First, there is the question whether MDE's impact on productivity is similar for all approaches of adopting MDE in practice. Second, there is the question whether MDE's impact on productivity for an approach of using MDE in practice remains stable over time. The answers for both questions are crucial for handling risks of MDE, but also for the design of future studies on MDE success. This thesis addresses these questions with the goal to support adoption of MDE in future. To enable a differentiated discussion about MDE, the term MDE setting'' is introduced. MDE setting refers to the applied technical setting, i.e. the employed manual and automated activities, artifacts, languages, and tools. An MDE setting's possible impact on productivity is studied with a focus on changeability and the interrelation to software development processes. This is done by introducing a taxonomy of changeability concerns that might be affected by an MDE setting. Further, three MDE traits are identified and it is studied for which manifestations of these MDE traits software development processes are impacted. To enable the assessment and evaluation of an MDE setting's impacts, the Software Manufacture Model language is introduced. This is a process modeling language that allows to reason about how relations between (modeling) artifacts (e.g. models or code files) change during application of manual or automated development activities. On that basis, risk analysis techniques are provided. These techniques allow identifying changeability risks and assessing the manifestations of the MDE traits (and with it an MDE setting's impact on software development processes). To address the range of validity, MDE settings from practice and their evolution histories were capture in context of this thesis. First, this data is used to show that MDE settings cover the whole spectrum concerning their impact on changeability or interrelation to software development processes. Neither it is seldom that MDE settings are neutral for processes nor is it seldom that MDE settings have impact on processes. Similarly, the impact on changeability differs relevantly. Second, a taxonomy of evolution of MDE settings is introduced. In that context it is discussed to what extent different types of changes on an MDE setting can influence this MDE setting's impact on changeability and the interrelation to processes. The category of structural evolution, which can change these characteristics of an MDE setting, is identified. The captured MDE settings from practice are used to show that structural evolution exists and is common. In addition, some examples of structural evolution steps are collected that actually led to a change in the characteristics of the respective MDE settings. Two implications are: First, the assessed diversity of MDE settings evaluates the need for the analysis techniques that shall be presented in this thesis. Second, evolution is one explanation for the diversity of MDE settings in practice. To summarize, this thesis studies the nature and evolution of MDE settings in practice. As a result support for the adoption of MDE settings is provided in form of techniques for the identification of risks relating to productivity impacts.}, language = {en} } @article{Vierheller2014, author = {Vierheller, Janine}, title = {Exploratory Data Analysis}, series = {Process Design for Natural Scientists: an agile model-driven approach}, journal = {Process Design for Natural Scientists: an agile model-driven approach}, number = {500}, editor = {Lambrecht, Anna-Lena and Margaria, Tiziana}, publisher = {Axel Springer Verlag}, address = {Berlin}, isbn = {978-3-662-45005-5}, issn = {1865-0929}, pages = {110 -- 126}, year = {2014}, abstract = {In bioinformatics the term exploratory data analysis refers to different methods to get an overview of large biological data sets. Hence, it helps to create a framework for further analysis and hypothesis testing. The workflow facilitates this first important step of the data analysis created by high-throughput technologies. The results are different plots showing the structure of the measurements. The goal of the workflow is the automatization of the exploratory data analysis, but also the flexibility should be guaranteed. The basic tool is the free software R.}, language = {en} } @article{Teske2014, author = {Teske, Daniel}, title = {Geocoder accuracy ranking}, series = {Process design for natural scientists: an agile model-driven approach}, journal = {Process design for natural scientists: an agile model-driven approach}, number = {500}, publisher = {Springer}, address = {Berlin}, isbn = {978-3-662-45005-5}, issn = {1865-0929}, pages = {161 -- 174}, year = {2014}, abstract = {Finding an address on a map is sometimes tricky: the chosen map application may be unfamiliar with the enclosed region. There are several geocoders on the market, they have different databases and algorithms to compute the query. Consequently, the geocoding results differ in their quality. Fortunately the geocoders provide a rich set of metadata. The workflow described in this paper compares this metadata with the aim to find out which geocoder is offering the best-fitting coordinate for a given address.}, language = {en} }