@phdthesis{Yang2019, author = {Yang, Haojin}, title = {Deep representation learning for multimedia data analysis}, school = {Universit{\"a}t Potsdam}, pages = {278}, year = {2019}, language = {en} } @phdthesis{Ion2018, author = {Ion, Alexandra}, title = {Metamaterial devices}, doi = {10.25932/publishup-42986}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-429861}, school = {Universit{\"a}t Potsdam}, pages = {x, 173}, year = {2018}, abstract = {Digital fabrication machines such as 3D printers excel at producing arbitrary shapes, such as for decorative objects. In recent years, researchers started to engineer not only the outer shape of objects, but also their internal microstructure. Such objects, typically based on 3D cell grids, are known as metamaterials. Metamaterials have been used to create materials that, e.g., change their volume, or have variable compliance. While metamaterials were initially understood as materials, we propose to think of them as devices. We argue that thinking of metamaterials as devices enables us to create internal structures that offer functionalities to implement an input-process-output model without electronics, but purely within the material's internal structure. In this thesis, we investigate three aspects of such metamaterial devices that implement parts of the input-process-output model: (1) materials that process analog inputs by implementing mechanisms based on their microstructure, (2) that process digital signals by embedding mechanical computation into the object's microstructure, and (3) interactive metamaterial objects that output to the user by changing their outside to interact with their environment. The input to our metamaterial devices is provided directly by the users interacting with the device by means of physically pushing the metamaterial, e.g., turning a handle, pushing a button, etc. The design of such intricate microstructures, which enable the functionality of metamaterial devices, is not obvious. The complexity of the design arises from the fact that not only a suitable cell geometry is necessary, but that additionally cells need to play together in a well-defined way. To support users in creating such microstructures, we research and implement interactive design tools. These tools allow experts to freely edit their materials, while supporting novice users by auto-generating cells assemblies from high-level input. Our tools implement easy-to-use interactions like brushing, interactively simulate the cell structures' deformation directly in the editor, and export the geometry as a 3D-printable file. Our goal is to foster more research and innovation on metamaterial devices by allowing the broader public to contribute.}, language = {en} } @phdthesis{Klimke2018, author = {Klimke, Jan}, title = {Web-based provisioning and application of large-scale virtual 3D city models}, doi = {10.25932/publishup-42805}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-428053}, school = {Universit{\"a}t Potsdam}, pages = {xiii, 141}, year = {2018}, abstract = {Virtual 3D city models represent and integrate a variety of spatial data and georeferenced data related to urban areas. With the help of improved remote-sensing technology, official 3D cadastral data, open data or geodata crowdsourcing, the quantity and availability of such data are constantly expanding and its quality is ever improving for many major cities and metropolitan regions. There are numerous fields of applications for such data, including city planning and development, environmental analysis and simulation, disaster and risk management, navigation systems, and interactive city maps. The dissemination and the interactive use of virtual 3D city models represent key technical functionality required by nearly all corresponding systems, services, and applications. The size and complexity of virtual 3D city models, their management, their handling, and especially their visualization represent challenging tasks. For example, mobile applications can hardly handle these models due to their massive data volume and data heterogeneity. Therefore, the efficient usage of all computational resources (e.g., storage, processing power, main memory, and graphics hardware, etc.) is a key requirement for software engineering in this field. Common approaches are based on complex clients that require the 3D model data (e.g., 3D meshes and 2D textures) to be transferred to them and that then render those received 3D models. However, these applications have to implement most stages of the visualization pipeline on client side. Thus, as high-quality 3D rendering processes strongly depend on locally available computer graphics resources, software engineering faces the challenge of building robust cross-platform client implementations. Web-based provisioning aims at providing a service-oriented software architecture that consists of tailored functional components for building web-based and mobile applications that manage and visualize virtual 3D city models. This thesis presents corresponding concepts and techniques for web-based provisioning of virtual 3D city models. In particular, it introduces services that allow us to efficiently build applications for virtual 3D city models based on a fine-grained service concept. The thesis covers five main areas: 1. A Service-Based Concept for Image-Based Provisioning of Virtual 3D City Models It creates a frame for a broad range of services related to the rendering and image-based dissemination of virtual 3D city models. 2. 3D Rendering Service for Virtual 3D City Models This service provides efficient, high-quality 3D rendering functionality for virtual 3D city models. In particular, it copes with requirements such as standardized data formats, massive model texturing, detailed 3D geometry, access to associated feature data, and non-assumed frame-to-frame coherence for parallel service requests. In addition, it supports thematic and artistic styling based on an expandable graphics effects library. 3. Layered Map Service for Virtual 3D City Models It generates a map-like representation of virtual 3D city models using an oblique view. It provides high visual quality, fast initial loading times, simple map-based interaction and feature data access. Based on a configurable client framework, mobile and web-based applications for virtual 3D city models can be created easily. 4. Video Service for Virtual 3D City Models It creates and synthesizes videos from virtual 3D city models. Without requiring client-side 3D rendering capabilities, users can create camera paths by a map-based user interface, configure scene contents, styling, image overlays, text overlays, and their transitions. The service significantly reduces the manual effort typically required to produce such videos. The videos can automatically be updated when the underlying data changes. 5. Service-Based Camera Interaction It supports task-based 3D camera interactions, which can be integrated seamlessly into service-based visualization applications. It is demonstrated how to build such web-based interactive applications for virtual 3D city models using this camera service. These contributions provide a framework for design, implementation, and deployment of future web-based applications, systems, and services for virtual 3D city models. The approach shows how to decompose the complex, monolithic functionality of current 3D geovisualization systems into independently designed, implemented, and operated service- oriented units. In that sense, this thesis also contributes to microservice architectures for 3D geovisualization systems—a key challenge of today's IT systems engineering to build scalable IT solutions.}, language = {en} } @phdthesis{Gawron2019, author = {Gawron, Marian}, title = {Towards automated advanced vulnerability analysis}, doi = {10.25932/publishup-42635}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-426352}, school = {Universit{\"a}t Potsdam}, pages = {149}, year = {2019}, abstract = {The identification of vulnerabilities in IT infrastructures is a crucial problem in enhancing the security, because many incidents resulted from already known vulnerabilities, which could have been resolved. Thus, the initial identification of vulnerabilities has to be used to directly resolve the related weaknesses and mitigate attack possibilities. The nature of vulnerability information requires a collection and normalization of the information prior to any utilization, because the information is widely distributed in different sources with their unique formats. Therefore, the comprehensive vulnerability model was defined and different sources have been integrated into one database. Furthermore, different analytic approaches have been designed and implemented into the HPI-VDB, which directly benefit from the comprehensive vulnerability model and especially from the logical preconditions and postconditions. Firstly, different approaches to detect vulnerabilities in both IT systems of average users and corporate networks of large companies are presented. Therefore, the approaches mainly focus on the identification of all installed applications, since it is a fundamental step in the detection. This detection is realized differently depending on the target use-case. Thus, the experience of the user, as well as the layout and possibilities of the target infrastructure are considered. Furthermore, a passive lightweight detection approach was invented that utilizes existing information on corporate networks to identify applications. In addition, two different approaches to represent the results using attack graphs are illustrated in the comparison between traditional attack graphs and a simplistic graph version, which was integrated into the database as well. The implementation of those use-cases for vulnerability information especially considers the usability. Beside the analytic approaches, the high data quality of the vulnerability information had to be achieved and guaranteed. The different problems of receiving incomplete or unreliable information for the vulnerabilities are addressed with different correction mechanisms. The corrections can be carried out with correlation or lookup mechanisms in reliable sources or identifier dictionaries. Furthermore, a machine learning based verification procedure was presented that allows an automatic derivation of important characteristics from the textual description of the vulnerabilities.}, language = {en} } @phdthesis{Sapegin2018, author = {Sapegin, Andrey}, title = {High-Speed Security Log Analytics Using Hybrid Outlier Detection}, doi = {10.25932/publishup-42611}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-426118}, school = {Universit{\"a}t Potsdam}, pages = {162}, year = {2018}, abstract = {The rapid development and integration of Information Technologies over the last decades influenced all areas of our life, including the business world. Yet not only the modern enterprises become digitalised, but also security and criminal threats move into the digital sphere. To withstand these threats, modern companies must be aware of all activities within their computer networks. The keystone for such continuous security monitoring is a Security Information and Event Management (SIEM) system that collects and processes all security-related log messages from the entire enterprise network. However, digital transformations and technologies, such as network virtualisation and widespread usage of mobile communications, lead to a constantly increasing number of monitored devices and systems. As a result, the amount of data that has to be processed by a SIEM system is increasing rapidly. Besides that, in-depth security analysis of the captured data requires the application of rather sophisticated outlier detection algorithms that have a high computational complexity. Existing outlier detection methods often suffer from performance issues and are not directly applicable for high-speed and high-volume analysis of heterogeneous security-related events, which becomes a major challenge for modern SIEM systems nowadays. This thesis provides a number of solutions for the mentioned challenges. First, it proposes a new SIEM system architecture for high-speed processing of security events, implementing parallel, in-memory and in-database processing principles. The proposed architecture also utilises the most efficient log format for high-speed data normalisation. Next, the thesis offers several novel high-speed outlier detection methods, including generic Hybrid Outlier Detection that can efficiently be used for Big Data analysis. Finally, the special User Behaviour Outlier Detection is proposed for better threat detection and analysis of particular user behaviour cases. The proposed architecture and methods were evaluated in terms of both performance and accuracy, as well as compared with classical architecture and existing algorithms. These evaluations were performed on multiple data sets, including simulated data, well-known public intrusion detection data set, and real data from the large multinational enterprise. The evaluation results have proved the high performance and efficacy of the developed methods. All concepts proposed in this thesis were integrated into the prototype of the SIEM system, capable of high-speed analysis of Big Security Data, which makes this integrated SIEM platform highly relevant for modern enterprise security applications.}, language = {en} } @book{MeinelRenzLuderichetal.2019, author = {Meinel, Christoph and Renz, Jan and Luderich, Matthias and Malyska, Vivien and Kaiser, Konstantin and Oberl{\"a}nder, Arne}, title = {Die HPI Schul-Cloud}, number = {125}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-453-1}, issn = {1613-5652}, doi = {10.25932/publishup-42306}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-423062}, publisher = {Universit{\"a}t Potsdam}, pages = {57}, year = {2019}, abstract = {Die digitale Transformation durchdringt alle gesellschaftlichen Ebenen und Felder, nicht zuletzt auch das Bildungssystem. Dieses ist auf die Ver{\"a}nderungen kaum vorbereitet und begegnet ihnen vor allem auf Basis des Eigenengagements seiner Lehrer*innen. Strukturelle Reaktionen auf den Mangel an qualitativ hochwertigen Fortbildungen, auf schlecht ausgestattete Unterrichtsr{\"a}ume und nicht professionell gewartete Computersysteme gibt es erst seit kurzem. Doch auch wenn Beharrungskr{\"a}fte unter P{\"a}dagog*innen verbreitet sind, erfordert die Transformation des Systems Schule auch eine neue Mentalit{\"a}t und neue Arbeits- und Kooperationsformen. Zeitgem{\"a}ßer Unterricht ben{\"o}tigt moderne Technologie und zeitgem{\"a}ße IT-Architekturen. Nur Systeme, die f{\"u}r Lehrer*innen und Sch{\"u}ler*innen problemlos verf{\"u}gbar, benutzerfreundlich zu bedienen und didaktisch flexibel einsetzbar sind, finden in Schulen Akzeptanz. Hierf{\"u}r haben wir die HPI Schul-Cloud entwickelt. Sie erm{\"o}glicht den einfachen Zugang zu neuesten, professionell gewarteten Anwendungen, verschiedensten digitalen Medien, die Vernetzung verschiedener Lernorte und den rechtssicheren Einsatz von Kommunikations- und Kollaborationstools. Die Entwicklung der HPI Schul-Cloud ist umso notwendiger, als dass rechtliche Anforderungen - insbesondere aus der Datenschutzgrundverordnung der EU herr{\"u}hrend - den Einsatz von Cloud-Anwendungen, die in der Arbeitswelt verbreitet sind, in Schulen unm{\"o}glich machen. Im Bildungsbereich verbreitete Anwendungen sind gr{\"o}ßtenteils technisch veraltet und nicht benutzerfreundlich. Dies n{\"o}tigt die Bundesl{\"a}nder zu kostspieligen Eigenentwicklungen mit Aufw{\"a}nden im zweistelligen Millionenbereich - Projekte die teilweise gescheitert sind. Dank der modularen Micro-Service-Architektur k{\"o}nnen die Bundesl{\"a}nder zuk{\"u}nftig auf die HPI Schul-Cloud als technische Grundlage f{\"u}r ihre Eigen- oder Gemeinschaftsprojekte zur{\"u}ckgreifen. Hierf{\"u}r gilt es, eine nachhaltige Struktur f{\"u}r die Weiterentwicklung der Open-Source-Software HPI Schul-Cloud zu schaffen. Dieser Bericht beschreibt den Entwicklungsstand und die weiteren Perspektiven des Projekts HPI Schul-Cloud im Januar 2019. 96 Schulen deutschlandweit nutzen die HPI Schul-Cloud, bereitgestellt durch das Hasso-Plattner-Institut. Weitere 45 Schulen und Studienseminare nutzen die Nieders{\"a}chsische Bildungscloud, die technisch auf der HPI Schul-Cloud basiert. Das vom Bundesministerium f{\"u}r Bildung und Forschung gef{\"o}rderte Projekt l{\"a}uft in der gegenw{\"a}rtigen Roll-Out-Phase bis zum 31. Juli 2021. Gemeinsam mit unserem Kooperationspartner MINT-EC streben wir an, die HPI Schul-Cloud m{\"o}glichst an allen Schulen des Netzwerks einzusetzen.}, language = {de} } @phdthesis{Richter2018, author = {Richter, Rico}, title = {Concepts and techniques for processing and rendering of massive 3D point clouds}, doi = {10.25932/publishup-42330}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-423304}, school = {Universit{\"a}t Potsdam}, pages = {v, 131}, year = {2018}, abstract = {Remote sensing technology, such as airborne, mobile, or terrestrial laser scanning, and photogrammetric techniques, are fundamental approaches for efficient, automatic creation of digital representations of spatial environments. For example, they allow us to generate 3D point clouds of landscapes, cities, infrastructure networks, and sites. As essential and universal category of geodata, 3D point clouds are used and processed by a growing number of applications, services, and systems such as in the domains of urban planning, landscape architecture, environmental monitoring, disaster management, virtual geographic environments as well as for spatial analysis and simulation. While the acquisition processes for 3D point clouds become more and more reliable and widely-used, applications and systems are faced with more and more 3D point cloud data. In addition, 3D point clouds, by their very nature, are raw data, i.e., they do not contain any structural or semantics information. Many processing strategies common to GIS such as deriving polygon-based 3D models generally do not scale for billions of points. GIS typically reduce data density and precision of 3D point clouds to cope with the sheer amount of data, but that results in a significant loss of valuable information at the same time. This thesis proposes concepts and techniques designed to efficiently store and process massive 3D point clouds. To this end, object-class segmentation approaches are presented to attribute semantics to 3D point clouds, used, for example, to identify building, vegetation, and ground structures and, thus, to enable processing, analyzing, and visualizing 3D point clouds in a more effective and efficient way. Similarly, change detection and updating strategies for 3D point clouds are introduced that allow for reducing storage requirements and incrementally updating 3D point cloud databases. In addition, this thesis presents out-of-core, real-time rendering techniques used to interactively explore 3D point clouds and related analysis results. All techniques have been implemented based on specialized spatial data structures, out-of-core algorithms, and GPU-based processing schemas to cope with massive 3D point clouds having billions of points. All proposed techniques have been evaluated and demonstrated their applicability to the field of geospatial applications and systems, in particular for tasks such as classification, processing, and visualization. Case studies for 3D point clouds of entire cities with up to 80 billion points show that the presented approaches open up new ways to manage and apply large-scale, dense, and time-variant 3D point clouds as required by a rapidly growing number of applications and systems.}, language = {en} } @phdthesis{Lopes2018, author = {Lopes, Pedro}, title = {Interactive Systems Based on Electrical Muscle Stimulation}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-421165}, school = {Universit{\"a}t Potsdam}, pages = {171}, year = {2018}, abstract = {How can interactive devices connect with users in the most immediate and intimate way? This question has driven interactive computing for decades. Throughout the last decades, we witnessed how mobile devices moved computing into users' pockets, and recently, wearables put computing in constant physical contact with the user's skin. In both cases moving the devices closer to users allowed devices to sense more of the user, and thus act more personal. The main question that drives our research is: what is the next logical step? Some researchers argue that the next generation of interactive devices will move past the user's skin and be directly implanted inside the user's body. This has already happened in that we have pacemakers, insulin pumps, etc. However, we argue that what we see is not devices moving towards the inside of the user's body, but rather towards the body's biological "interface" they need to address in order to perform their function. To implement our vision, we created a set of devices that intentionally borrow parts of the user's body for input and output, rather than adding more technology to the body. In this dissertation we present one specific flavor of such devices, i.e., devices that borrow the user's muscles. We engineered I/O devices that interact with the user by reading and controlling muscle activity. To achieve the latter, our devices are based on medical-grade signal generators and electrodes attached to the user's skin that send electrical impulses to the user's muscles; these impulses then cause the user's muscles to contract. While electrical muscle stimulation (EMS) devices have been used to regenerate lost motor functions in rehabilitation medicine since the 1960s, in this dissertation, we propose a new perspective: EMS as a means for creating interactive systems. We start by presenting seven prototypes of interactive devices that we have created to illustrate several benefits of EMS. These devices form two main categories: (1) Devices that allow users eyes-free access to information by means of their proprioceptive sense, such as the value of a variable in a computer system, a tool, or a plot; (2) Devices that increase immersion in virtual reality by simulating large forces, such as wind, physical impact, or walls and heavy objects. Then, we analyze the potential of EMS to build interactive systems that miniaturize well and discuss how they leverage our proprioceptive sense as an I/O modality. We proceed by laying out the benefits and disadvantages of both EMS and mechanical haptic devices, such as exoskeletons. We conclude by sketching an outline for future research on EMS by listing open technical, ethical and philosophical questions that we left unanswered.}, language = {en} } @phdthesis{Herzberg2018, author = {Herzberg, Nico}, title = {Integrating events into non-automated business process environments}, school = {Universit{\"a}t Potsdam}, pages = {243}, year = {2018}, language = {en} } @phdthesis{Cheng2018, author = {Cheng, Lung-Pan}, title = {Human actuation}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-418371}, school = {Universit{\"a}t Potsdam}, pages = {xiv, 85}, year = {2018}, abstract = {Ever since the conception of the virtual reality headset in 1968, many researchers have argued that the next step in virtual reality is to allow users to not only see and hear, but also feel virtual worlds. One approach is to use mechanical equipment to provide haptic feedback, e.g., robotic arms, exoskeletons and motion platforms. However, the size and the weight of such mechanical equipment tends to be proportional to its target's size and weight, i.e., providing human-scale haptic feedback requires human-scale equipment, often restricting them to arcades and lab environments. The key idea behind this dissertation is to bypass mechanical equipment by instead leveraging human muscle power. We thus create software systems that orchestrate humans in doing such mechanical labor—this is what we call human actuation. A potential benefit of such systems is that humans are more generic, flexible, and versatile than machines. This brings a wide range of haptic feedback to modern virtual reality systems. We start with a proof-of-concept system—Haptic Turk, focusing on delivering motion experiences just like a motion platform. All Haptic Turk setups consist of a user who is supported by one or more human actuators. The user enjoys an interactive motion simulation such as a hang glider experience, but the motion is generated by those human actuators who manually lift, tilt, and push the user's limbs or torso. To get the timing and force right, timed motion instructions in a format familiar from rhythm games are generated by the system. Next, we extend the concept of human actuation from 3-DoF to 6-DoF virtual reality where users have the freedom to walk around. TurkDeck tackles this problem by orchestrating a group of human actuators to reconfigure a set of passive props on the fly while the user is progressing in the virtual environment. TurkDeck schedules human actuators by their distances from the user, and instructs them to reconfigure the props to the right place on the right time using laser projection and voice output. Our studies in Haptic Turk and TurkDeck showed that human actuators enjoyed the experience but not as much as users. To eliminate the need of dedicated human actuators, Mutual Turk makes everyone a user by exchanging mechanical actuation between two or more users. Mutual Turk's main functionality is that it orchestrates the users so as to actuate props at just the right moment and with just the right force to produce the correct feedback in each other's experience. Finally, we further eliminate the need of another user, making human actuation applicable to single-user experiences. iTurk makes the user constantly reconfigure and animate otherwise passive props. This allows iTurk to provide virtual worlds with constantly varying or even animated haptic effects, even though the only animate entity present in the system is the user. Our demo experience features one example each of iTurk's two main types of props, i.e., reconfigurable props (the foldable board from TurkDeck) and animated props (the pendulum). We conclude this dissertation by summarizing the findings of our explorations and pointing out future directions. We discuss the development of human actuation compare to traditional machine actuation, the possibility of combining human and machine actuators and interaction models that involve more human actuators.}, language = {en} } @book{MeinelGayvoronskayaSchnjakin2018, author = {Meinel, Christoph and Gayvoronskaya, Tatiana and Schnjakin, Maxim}, title = {Blockchain}, number = {124}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-441-8}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-414525}, publisher = {Universit{\"a}t Potsdam}, pages = {102}, year = {2018}, abstract = {The term blockchain has recently become a buzzword, but only few know what exactly lies behind this approach. According to a survey, issued in the first quarter of 2017, the term is only known by 35 percent of German medium-sized enterprise representatives. However, the blockchain technology is very interesting for the mass media because of its rapid development and global capturing of different markets. For example, many see blockchain technology either as an all-purpose weapon— which only a few have access to—or as a hacker technology for secret deals in the darknet. The innovation of blockchain technology is found in its successful combination of already existing approaches: such as decentralized networks, cryptography, and consensus models. This innovative concept makes it possible to exchange values in a decentralized system. At the same time, there is no requirement for trust between its nodes (e.g. users). With this study the Hasso Plattner Institute would like to help readers form their own opinion about blockchain technology, and to distinguish between truly innovative properties and hype. The authors of the present study analyze the positive and negative properties of the blockchain architecture and suggest possible solutions, which can contribute to the efficient use of the technology. We recommend that every company define a clear target for the intended application, which is achievable with a reasonable cost-benefit ration, before deciding on this technology. Both the possibilities and the limitations of blockchain technology need to be considered. The relevant steps that must be taken in this respect are summarized /summed up for the reader in this study. Furthermore, this study elaborates on urgent problems such as the scalability of the blockchain, appropriate consensus algorithm and security, including various types of possible attacks and their countermeasures. New blockchains, for example, run the risk of reducing security, as changes to existing technology can lead to lacks in the security and failures. After discussing the innovative properties and problems of the blockchain technology, its implementation is discussed. There are a lot of implementation opportunities for companies available who are interested in the blockchain realization. The numerous applications have either their own blockchain as a basis or use existing and widespread blockchain systems. Various consortia and projects offer "blockchain-as-a-service{\"a}nd help other companies to develop, test and deploy their own applications. This study gives a detailed overview of diverse relevant applications and projects in the field of blockchain technology. As this technology is still a relatively young and fast developing approach, it still lacks uniform standards to allow the cooperation of different systems and to which all developers can adhere. Currently, developers are orienting themselves to Bitcoin, Ethereum and Hyperledger systems, which serve as the basis for many other blockchain applications. The goal is to give readers a clear and comprehensive overview of blockchain technology and its capabilities.}, language = {en} } @book{GieseMaximovaSakizloglouetal.2018, author = {Giese, Holger and Maximova, Maria and Sakizloglou, Lucas and Schneider, Sven}, title = {Metric temporal graph logic over typed attributed graphs}, number = {123}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-433-3}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-411351}, publisher = {Universit{\"a}t Potsdam}, pages = {29}, year = {2018}, abstract = {Various kinds of typed attributed graphs are used to represent states of systems from a broad range of domains. For dynamic systems, established formalisms such as graph transformations provide a formal model for defining state sequences. We consider the extended case where time elapses between states and introduce a logic to reason about these sequences. With this logic we express properties on the structure and attributes of states as well as on the temporal occurrence of states that are related by their inner structure, which no formal logic over graphs accomplishes concisely so far. Firstly, we introduce graphs with history by equipping every graph element with the timestamp of its creation and, if applicable, its deletion. Secondly, we define a logic on graphs by integrating the temporal operator until into the well-established logic of nested graph conditions. Thirdly, we prove that our logic is equally expressive to nested graph conditions by providing a suitable reduction. Finally, the implementation of this reduction allows for the tool-based analysis of metric temporal properties for state sequences.}, language = {en} } @book{vanderWaltOdunAyoBastianetal.2018, author = {van der Walt, Estee and Odun-Ayo, Isaac and Bastian, Matthias and Eldin Elsaid, Mohamed Esam}, title = {Proceedings of the Fifth HPI Cloud Symposium "Operating the Cloud" 2017}, number = {122}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-432-6}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-411330}, publisher = {Universit{\"a}t Potsdam}, pages = {70}, year = {2018}, abstract = {Every year, the Hasso Plattner Institute (HPI) invites guests from industry and academia to a collaborative scientific workshop on the topic Operating the Cloud. Our goal is to provide a forum for the exchange of knowledge and experience between industry and academia. Co-located with the event is the HPI's Future SOC Lab day, which offers an additional attractive and conducive environment for scientific and industry related discussions. Operating the Cloud aims to be a platform for productive interactions of innovative ideas, visions, and upcoming technologies in the field of cloud operation and administration. In these proceedings, the results of the fifth HPI cloud symposium Operating the Cloud 2017 are published. We thank the authors for exciting presentations and insights into their current work and research. Moreover, we look forward to more interesting submissions for the upcoming symposium in 2018.}, language = {en} } @phdthesis{Hildebrandt2017, author = {Hildebrandt, Dieter}, title = {Service-oriented 3D geovisualization systems}, school = {Universit{\"a}t Potsdam}, pages = {xii, 268}, year = {2017}, abstract = {3D geovisualization systems (3DGeoVSs) that use 3D geovirtual environments as a conceptual and technical framework are increasingly used for various applications. They facilitate obtaining insights from ubiquitous geodata by exploiting human abilities that other methods cannot provide. 3DGeoVSs are often complex and evolving systems required to be adaptable and to leverage distributed resources. Designing a 3DGeoVS based on service-oriented architectures, standards, and image-based representations (SSI) facilitates resource sharing and the agile and efficient construction and change of interoperable systems. In particular, exploiting image-based representations (IReps) of 3D views on geodata supports taking full advantage of the potential of such system designs by providing an efficient, decoupled, interoperable, and increasingly applied representation. However, there is insufficient knowledge on how to build service-oriented, standards-based 3DGeoVSs that exploit IReps. This insufficiency is substantially due to technology and interoperability gaps between the geovisualization domain and further domains that such systems rely on. This work presents a coherent framework of contributions that support designing the software architectures of targeted systems and exploiting IReps for providing, styling, and interacting with geodata. The contributions uniquely integrate existing concepts from multiple domains and novel contributions for identified limitations. The proposed software reference architecture (SRA) for 3DGeoVSs based on SSI facilitates designing concrete software architectures of such systems. The SRA describes the decomposition of 3DGeoVSs into a network of services and integrates the following contributions to facilitate exploiting IReps effectively and efficiently. The proposed generalized visualization pipeline model generalizes the prevalent visualization pipeline model and overcomes its expressiveness limitations with respect to transforming IReps. The proposed approach for image-based provisioning enables generating and supplying service consumers with image-based views (IViews). IViews act as first-class data entities in the communication between services and provide a suitable IRep and encoding of geodata. The proposed approach for image-based styling separates concerns of styling from image generation and enables styling geodata uniformly represented as IViews specified as algebraic compositions of high-level styling operators. The proposed approach for interactive image-based novel view generation enables generating new IViews from existing IViews in response to interactive manipulations of the viewing camera and includes an architectural pattern that generalizes common novel view generation. The proposed interactive assisting, constrained 3D navigation technique demonstrates how a navigation technique can be built that supports users in navigating multiscale virtual 3D city models, operates in 3DGeoVSs based on SSI as an application of the SRA, can exploit IReps, and can support collaborating services in exploiting IReps. The validity of the contributions is supported by proof-of-concept prototype implementations and applications and effectiveness and efficiency studies including a user study. Results suggest that this work promises to support designing 3DGeoVSs based on SSI that are more effective and efficient and that can exploit IReps effectively and efficiently. This work presents a template software architecture and key building blocks for building novel IT solutions and applications for geodata, e.g., as components of spatial data infrastructures.}, language = {en} } @phdthesis{Gruetze2018, author = {Gr{\"u}tze, Toni}, title = {Adding value to text with user-generated content}, school = {Universit{\"a}t Potsdam}, pages = {ii, 114}, year = {2018}, abstract = {In recent years, the ever-growing amount of documents on the Web as well as in closed systems for private or business contexts led to a considerable increase of valuable textual information about topics, events, and entities. It is a truism that the majority of information (i.e., business-relevant data) is only available in unstructured textual form. The text mining research field comprises various practice areas that have the common goal of harvesting high-quality information from textual data. These information help addressing users' information needs. In this thesis, we utilize the knowledge represented in user-generated content (UGC) originating from various social media services to improve text mining results. These social media platforms provide a plethora of information with varying focuses. In many cases, an essential feature of such platforms is to share relevant content with a peer group. Thus, the data exchanged in these communities tend to be focused on the interests of the user base. The popularity of social media services is growing continuously and the inherent knowledge is available to be utilized. We show that this knowledge can be used for three different tasks. Initially, we demonstrate that when searching persons with ambiguous names, the information from Wikipedia can be bootstrapped to group web search results according to the individuals occurring in the documents. We introduce two models and different means to handle persons missing in the UGC source. We show that the proposed approaches outperform traditional algorithms for search result clustering. Secondly, we discuss how the categorization of texts according to continuously changing community-generated folksonomies helps users to identify new information related to their interests. We specifically target temporal changes in the UGC and show how they influence the quality of different tag recommendation approaches. Finally, we introduce an algorithm to attempt the entity linking problem, a necessity for harvesting entity knowledge from large text collections. The goal is the linkage of mentions within the documents with their real-world entities. A major focus lies on the efficient derivation of coherent links. For each of the contributions, we provide a wide range of experiments on various text corpora as well as different sources of UGC. The evaluation shows the added value that the usage of these sources provides and confirms the appropriateness of leveraging user-generated content to serve different information needs.}, language = {en} } @phdthesis{Kruse2018, author = {Kruse, Sebastian}, title = {Scalable data profiling}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-412521}, school = {Universit{\"a}t Potsdam}, pages = {ii, 156}, year = {2018}, abstract = {Data profiling is the act of extracting structural metadata from datasets. Structural metadata, such as data dependencies and statistics, can support data management operations, such as data integration and data cleaning. Data management often is the most time-consuming activity in any data-related project. Its support is extremely valuable in our data-driven world, so that more time can be spent on the actual utilization of the data, e. g., building analytical models. In most scenarios, however, structural metadata is not given and must be extracted first. Therefore, efficient data profiling methods are highly desirable. Data profiling is a computationally expensive problem; in fact, most dependency discovery problems entail search spaces that grow exponentially in the number of attributes. To this end, this thesis introduces novel discovery algorithms for various types of data dependencies - namely inclusion dependencies, conditional inclusion dependencies, partial functional dependencies, and partial unique column combinations - that considerably improve over state-of-the-art algorithms in terms of efficiency and that scale to datasets that cannot be processed by existing algorithms. The key to those improvements are not only algorithmic innovations, such as novel pruning rules or traversal strategies, but also algorithm designs tailored for distributed execution. While distributed data profiling has been mostly neglected by previous works, it is a logical consequence on the face of recent hardware trends and the computational hardness of dependency discovery. To demonstrate the utility of data profiling for data management, this thesis furthermore presents Metacrate, a database for structural metadata. Its salient features are its flexible data model, the capability to integrate various kinds of structural metadata, and its rich metadata analytics library. We show how to perform a data anamnesis of unknown, complex datasets based on this technology. In particular, we describe in detail how to reconstruct the schemata and assess their quality as part of the data anamnesis. The data profiling algorithms and Metacrate have been carefully implemented, integrated with the Metanome data profiling tool, and are available as free software. In that way, we intend to allow for easy repeatability of our research results and also provide them for actual usage in real-world data-related projects.}, language = {en} } @book{ReschkeTaeumelPapeetal.2018, author = {Reschke, Jakob and Taeumel, Marcel and Pape, Tobias and Niephaus, Fabio and Hirschfeld, Robert}, title = {Towards version control in object-based systems}, volume = {121}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-430-2}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-410812}, publisher = {Universit{\"a}t Potsdam}, pages = {100}, year = {2018}, abstract = {Version control is a widely used practice among software developers. It reduces the risk of changing their software and allows them to manage different configurations and to collaborate with others more efficiently. This is amplified by code sharing platforms such as GitHub or Bitbucket. Most version control systems track files (e.g., Git, Mercurial, and Subversion do), but some programming environments do not operate on files, but on objects instead (many Smalltalk implementations do). Users of such environments want to use version control for their objects anyway. Specialized version control systems, such as the ones available for Smalltalk systems (e.g., ENVY/Developer and Monticello), focus on a small subset of objects that can be versioned. Most of these systems concentrate on the tracking of methods, classes, and configurations of these. Other user-defined and user-built objects are either not eligible for version control at all, tracking them involves complicated workarounds, or a fixed, domain-unspecific serialization format is used that does not equally suit all kinds of objects. Moreover, these version control systems that are specific to a programming environment require their own code sharing platforms; popular, well-established platforms for file-based version control systems cannot be used or adapter solutions need to be implemented and maintained. To improve the situation for version control of arbitrary objects, a framework for tracking, converting, and storing of objects is presented in this report. It allows editions of objects to be stored in an exchangeable, existing backend version control system. The platforms of the backend version control system can thus be reused. Users and objects have control over how objects are captured for the purpose of version control. Domain-specific requirements can be implemented. The storage format (i.e. the file format, when file-based backend version control systems are used) can also vary from one object to another. Different editions of objects can be compared and sets of changes can be applied to graphs of objects. A generic way for capturing and restoring that supports most kinds of objects is described. It models each object as a collection of slots. Thus, users can begin to track their objects without first having to implement version control supplements for their own kinds of objects. The proposed architecture is evaluated using a prototype implementation that can be used to track objects in Squeak/Smalltalk with Git. The prototype improves the suboptimal standing of user objects with respect to version control described above and also simplifies some version control tasks for classes and methods as well. It also raises new problems, which are discussed in this report as well.}, language = {en} } @phdthesis{Zuo2017, author = {Zuo, Zhe}, title = {From unstructured to structured: Context-based named entity mining from text}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-412576}, school = {Universit{\"a}t Potsdam}, pages = {vii, 112}, year = {2017}, abstract = {With recent advances in the area of information extraction, automatically extracting structured information from a vast amount of unstructured textual data becomes an important task, which is infeasible for humans to capture all information manually. Named entities (e.g., persons, organizations, and locations), which are crucial components in texts, are usually the subjects of structured information from textual documents. Therefore, the task of named entity mining receives much attention. It consists of three major subtasks, which are named entity recognition, named entity linking, and relation extraction. These three tasks build up an entire pipeline of a named entity mining system, where each of them has its challenges and can be employed for further applications. As a fundamental task in the natural language processing domain, studies on named entity recognition have a long history, and many existing approaches produce reliable results. The task is aiming to extract mentions of named entities in text and identify their types. Named entity linking recently received much attention with the development of knowledge bases that contain rich information about entities. The goal is to disambiguate mentions of named entities and to link them to the corresponding entries in a knowledge base. Relation extraction, as the final step of named entity mining, is a highly challenging task, which is to extract semantic relations between named entities, e.g., the ownership relation between two companies. In this thesis, we review the state-of-the-art of named entity mining domain in detail, including valuable features, techniques, evaluation methodologies, and so on. Furthermore, we present two of our approaches that focus on the named entity linking and relation extraction tasks separately. To solve the named entity linking task, we propose the entity linking technique, BEL, which operates on a textual range of relevant terms and aggregates decisions from an ensemble of simple classifiers. Each of the classifiers operates on a randomly sampled subset of the above range. In extensive experiments on hand-labeled and benchmark datasets, our approach outperformed state-of-the-art entity linking techniques, both in terms of quality and efficiency. For the task of relation extraction, we focus on extracting a specific group of difficult relation types, business relations between companies. These relations can be used to gain valuable insight into the interactions between companies and perform complex analytics, such as predicting risk or valuating companies. Our semi-supervised strategy can extract business relations between companies based on only a few user-provided seed company pairs. By doing so, we also provide a solution for the problem of determining the direction of asymmetric relations, such as the ownership_of relation. We improve the reliability of the extraction process by using a holistic pattern identification method, which classifies the generated extraction patterns. Our experiments show that we can accurately and reliably extract new entity pairs occurring in the target relation by using as few as five labeled seed pairs.}, language = {en} } @phdthesis{Zieger2017, author = {Zieger, Tobias}, title = {Self-adaptive data quality}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-410573}, school = {Universit{\"a}t Potsdam}, pages = {vii, 125}, year = {2017}, abstract = {Carrying out business processes successfully is closely linked to the quality of the data inventory in an organization. Lacks in data quality lead to problems: Incorrect address data prevents (timely) shipments to customers. Erroneous orders lead to returns and thus to unnecessary effort. Wrong pricing forces companies to miss out on revenues or to impair customer satisfaction. If orders or customer records cannot be retrieved, complaint management takes longer. Due to erroneous inventories, too few or too much supplies might be reordered. A special problem with data quality and the reason for many of the issues mentioned above are duplicates in databases. Duplicates are different representations of same real-world objects in a dataset. However, these representations differ from each other and are for that reason hard to match by a computer. Moreover, the number of required comparisons to find those duplicates grows with the square of the dataset size. To cleanse the data, these duplicates must be detected and removed. Duplicate detection is a very laborious process. To achieve satisfactory results, appropriate software must be created and configured (similarity measures, partitioning keys, thresholds, etc.). Both requires much manual effort and experience. This thesis addresses automation of parameter selection for duplicate detection and presents several novel approaches that eliminate the need for human experience in parts of the duplicate detection process. A pre-processing step is introduced that analyzes the datasets in question and classifies their attributes semantically. Not only do these annotations help understanding the respective datasets, but they also facilitate subsequent steps, for example, by selecting appropriate similarity measures or normalizing the data upfront. This approach works without schema information. Following that, we show a partitioning technique that strongly reduces the number of pair comparisons for the duplicate detection process. The approach automatically finds particularly suitable partitioning keys that simultaneously allow for effective and efficient duplicate retrieval. By means of a user study, we demonstrate that this technique finds partitioning keys that outperform expert suggestions and additionally does not need manual configuration. Furthermore, this approach can be applied independently of the attribute types. To measure the success of a duplicate detection process and to execute the described partitioning approach, a gold standard is required that provides information about the actual duplicates in a training dataset. This thesis presents a technique that uses existing duplicate detection results and crowdsourcing to create a near gold standard that can be used for the purposes above. Another part of the thesis describes and evaluates strategies how to reduce these crowdsourcing costs and to achieve a consensus with less effort.}, language = {en} } @phdthesis{Ussath2017, author = {Ussath, Martin Georg}, title = {Analytical approaches for advanced attacks}, school = {Universit{\"a}t Potsdam}, pages = {169}, year = {2017}, language = {en} }