@phdthesis{Hoellerer2016, author = {H{\"o}llerer, Reinhard}, title = {Modellierung und Optimierung von B{\"u}rgerdiensten am Beispiel der Stadt Landshut}, doi = {10.25932/publishup-42598}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-425986}, school = {Universit{\"a}t Potsdam}, pages = {xii, 244}, year = {2016}, abstract = {Die Projektierung und Abwicklung sowie die statische und dynamische Analyse von Gesch{\"a}ftsprozessen im Bereich des Verwaltens und Regierens auf kommunaler, L{\"a}nder- wie auch Bundesebene mit Hilfe von Informations- und Kommunikationstechniken besch{\"a}ftigen Politiker und Strategen f{\"u}r Informationstechnologie ebenso wie die {\"O}ffentlichkeit seit Langem. Der hieraus entstandene Begriff E-Government wurde in der Folge aus den unterschiedlichsten technischen, politischen und semantischen Blickrichtungen beleuchtet. Die vorliegende Arbeit konzentriert sich dabei auf zwei Schwerpunktthemen: • Das erste Schwerpunktthema behandelt den Entwurf eines hierarchischen Architekturmodells, f{\"u}r welches sieben hierarchische Schichten identifiziert werden k{\"o}nnen. Diese erscheinen notwendig, aber auch hinreichend, um den allgemeinen Fall zu beschreiben. Den Hintergrund hierf{\"u}r liefert die langj{\"a}hrige Prozess- und Verwaltungserfahrung als Leiter der EDV-Abteilung der Stadtverwaltung Landshut, eine kreisfreie Stadt mit rund 69.000 Einwohnern im Nordosten von M{\"u}nchen. Sie steht als Repr{\"a}sentant f{\"u}r viele Verwaltungsvorg{\"a}nge in der Bundesrepublik Deutschland und ist dennoch als Analyseobjekt in der Gesamtkomplexit{\"a}t und Prozessquantit{\"a}t {\"u}berschaubar. Somit k{\"o}nnen aus der Analyse s{\"a}mtlicher Kernabl{\"a}ufe statische und dynamische Strukturen extrahiert und abstrakt modelliert werden. Die Schwerpunkte liegen in der Darstellung der vorhandenen Bedienabl{\"a}ufe in einer Kommune. Die Transformation der Bedienanforderung in einem hierarchischen System, die Darstellung der Kontroll- und der Operationszust{\"a}nde in allen Schichten wie auch die Strategie der Fehlererkennung und Fehlerbehebung schaffen eine transparente Basis f{\"u}r umfassende Restrukturierungen und Optimierungen. F{\"u}r die Modellierung wurde FMC-eCS eingesetzt, eine am Hasso-Plattner-Institut f{\"u}r Softwaresystemtechnik GmbH (HPI) im Fachgebiet Kommunikationssysteme entwickelte Methodik zur Modellierung zustandsdiskreter Systeme unter Ber{\"u}cksichtigung m{\"o}glicher Inkonsistenzen (Betreuer: Prof. Dr.-Ing. Werner Zorn [ZW07a, ZW07b]). • Das zweite Schwerpunktthema widmet sich der quantitativen Modellierung und Optimierung von E-Government-Bediensystemen, welche am Beispiel des B{\"u}rgerb{\"u}ros der Stadt Landshut im Zeitraum 2008 bis 2015 durchgef{\"u}hrt wurden. Dies erfolgt auf Basis einer kontinuierlichen Betriebsdatenerfassung mit aufwendiger Vorverarbeitung zur Extrahierung mathematisch beschreibbarer Wahrscheinlichkeitsverteilungen. Der hieraus entwickelte Dienstplan wurde hinsichtlich der erzielbaren Optimierungen im dauerhaften Echteinsatz verifiziert. [ZW07a] Zorn, Werner: «FMC-QE A New Approach in Quantitative Modeling», Vortrag anl{\"a}sslich: MSV'07- The 2007 International Conference on Modeling, Simulation and Visualization Methods WorldComp2007, Las Vegas, 28.6.2007. [ZW07b] Zorn, Werner: «FMC-QE, A New Approach in Quantitative Modeling», Ver{\"o}ffentlichung, Hasso-Plattner-Institut f{\"u}r Softwaresystemtechnik an der Universit{\"a}t Potsdam, 28.6.2007.}, language = {de} } @phdthesis{Hoellerer2016, author = {H{\"o}llerer, Reinhard}, title = {Modellierung und Optimierung von B{\"u}rgerdiensten am Beispiel der Stadt Landshut}, school = {Universit{\"a}t Potsdam}, pages = {244}, year = {2016}, abstract = {Die Projektierung und Abwicklung sowie die statische und dynamische Analyse von Gesch{\"a}ftsprozessen im Bereich des Verwaltens und Regierens auf kommunaler, L{\"a}nder- wie auch Bundesebene mit Hilfe von Informations- und Kommunikationstechniken besch{\"a}ftigen Politiker und Strategen f{\"u}r Informationstechnologie ebenso wie die {\"O}ffentlichkeit seit Langem. Der hieraus entstandene Begriff E-Government wurde in der Folge aus den unterschiedlichsten technischen, politischen und semantischen Blickrichtungen beleuchtet. Die vorliegende Arbeit konzentriert sich dabei auf zwei Schwerpunktthemen: > Das erste Schwerpunktthema behandelt den Entwurf eines hierarchischen Architekturmodells, f{\"u}r welches sieben hierarchische Schichten identifiziert werden k{\"o}nnen. Diese erscheinen notwendig, aber auch hinreichend, um den allgemeinen Fall zu beschreiben. Den Hintergrund hierf{\"u}r liefert die langj{\"a}hrige Prozess- und Verwaltungserfahrung als Leiter der EDV-Abteilung der Stadtverwaltung Landshut, eine kreisfreie Stadt mit rund 69.000 Einwohnern im Nordosten von M{\"u}nchen. Sie steht als Repr{\"a}sentant f{\"u}r viele Verwaltungsvorg{\"a}nge in der Bundesrepublik Deutschland und ist dennoch als Analyseobjekt in der Gesamtkomplexit{\"a}t und Prozessquantit{\"a}t {\"u}berschaubar. Somit k{\"o}nnen aus der Analyse s{\"a}mtlicher Kernabl{\"a}ufe statische und dynamische Strukturen extrahiert und abstrakt modelliert werden. Die Schwerpunkte liegen in der Darstellung der vorhandenen Bedienabl{\"a}ufe in einer Kommune. Die Transformation der Bedienanforderung in einem hierarchischen System, die Darstellung der Kontroll- und der Operationszust{\"a}nde in allen Schichten wie auch die Strategie der Fehlererkennung und Fehlerbehebung schaffen eine transparente Basis f{\"u}r umfassende Restrukturierungen und Optimierungen. F{\"u}r die Modellierung wurde FMC-eCS eingesetzt, eine am Hasso-Plattner-Institut f{\"u}r Softwaresystemtechnik GmbH (HPI) im Fachgebiet Kommunikationssysteme entwickelte Methodik zur Modellierung zustandsdiskreter Systeme unter Ber{\"u}cksichtigung m{\"o}glicher Inkonsistenzen >Das zweite Schwerpunktthema widmet sich der quantitativen Modellierung und Optimierung von E-Government-Bediensystemen, welche am Beispiel des B{\"u}rgerb{\"u}ros der Stadt Landshut im Zeitraum 2008 bis 2015 durchgef{\"u}hrt wurden. Dies erfolgt auf Basis einer kontinuierlichen Betriebsdatenerfassung mit aufwendiger Vorverarbeitung zur Extrahierung mathematisch beschreibbarer Wahrscheinlichkeitsverteilungen. Der hieraus entwickelte Dienstplan wurde hinsichtlich der erzielbaren Optimierungen im dauerhaften Echteinsatz verifiziert.}, language = {de} } @phdthesis{Mueller2016, author = {M{\"u}ller, Stephan Heinz}, title = {Aggregates Caching for Enterprise Applications}, school = {Universit{\"a}t Potsdam}, pages = {167}, year = {2016}, abstract = {The introduction of columnar in-memory databases, along with hardware evolution, has made the execution of transactional and analytical enterprise application workloads on a single system both feasible and viable. Yet, we argue that executing analytical aggregate queries directly on the transactional data can decrease the overall system performance. Despite the aggregation capabilities of columnar in-memory databases, the direct access to records of a materialized aggregate is always more efficient than aggregating on the fly. The traditional approach to materialized aggregates, however, introduces significant overhead in terms of materialized view selection, maintenance, and exploitation. When this overhead is handled by the application, it increases the application complexity, and can slow down the transactional throughput of inserts, updates, and deletes. In this thesis, we motivate, propose, and evaluate the aggregate cache, a materialized aggregate engine in the main-delta architecture of a columnar in-memory database that provides efficient means to handle costly aggregate queries of enterprise applications. For our design, we leverage the specifics of the main-delta architecture that separates a table into a main and delta partition. The central concept is to only cache the partial aggregate query result as defined on the main partition of a table, because the main partition is relatively stable as records are only inserted into the delta partition. We contribute by proposing incremental aggregate maintenance and query compensation techniques for mixed workloads of enterprise applications. In addition, we introduce aggregate profit metrics that increase the likelihood of persisting the most profitable aggregates in the aggregate cache. Query compensation and maintenance of materialized aggregates based on joins of multiple tables is expensive due to the partitioned tables in the main-delta architecture. Our analysis of enterprise applications has revealed several data schema and workload patterns. This includes the observation that transactional data is persisted in header and item tables, whereas in many cases, the insertion of related header and item records is executed in a single database transaction. We contribute by proposing an approach to transport these application object semantics to the database system and optimize the query processing using the aggregate cache by applying partition pruning and predicate pushdown techniques. For the experimental evaluation, we propose the FICO benchmark that is based on data from a productive ERP system with extracted mixed workloads. Our evaluation reveals that the aggregate cache can accelerate the execution of aggregate queries up to a factor of 60 whereas the speedup highly depends on the number of aggregated records in the main and delta partitions. In mixed workloads, the proposed aggregate maintenance and query compensation techniques perform up to an order of magnitude better than traditional materialized aggregate maintenance approaches. The introduced aggregate profit metrics outperform existing costbased metrics by up to 20\%. Lastly, the join pruning and predicate pushdown techniques can accelerate query execution in the aggregate cache in the presence of multiple partitioned tables by up to an order of magnitude.}, language = {en} } @phdthesis{Dick2016, author = {Dick, Uwe}, title = {Discriminative Classification Models for Internet Security}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-102593}, school = {Universit{\"a}t Potsdam}, pages = {x, 57}, year = {2016}, abstract = {Services that operate over the Internet are under constant threat of being exposed to fraudulent use. Maintaining good user experience for legitimate users often requires the classification of entities as malicious or legitimate in order to initiate countermeasures. As an example, inbound email spam filters decide for spam or non-spam. They can base their decision on both the content of each email as well as on features that summarize prior emails received from the sending server. In general, discriminative classification methods learn to distinguish positive from negative entities. Each decision for a label may be based on features of the entity and related entities. When labels of related entities have strong interdependencies---as can be assumed e.g. for emails being delivered by the same user---classification decisions should not be made independently and dependencies should be modeled in the decision function. This thesis addresses the formulation of discriminative classification problems that are tailored for the specific demands of the following three Internet security applications. Theoretical and algorithmic solutions are devised to protect an email service against flooding of user inboxes, to mitigate abusive usage of outbound email servers, and to protect web servers against distributed denial of service attacks. In the application of filtering an inbound email stream for unsolicited emails, utilizing features that go beyond each individual email's content can be valuable. Information about each sending mail server can be aggregated over time and may help in identifying unwanted emails. However, while this information will be available to the deployed email filter, some parts of the training data that are compiled by third party providers may not contain this information. The missing features have to be estimated at training time in order to learn a classification model. In this thesis an algorithm is derived that learns a decision function that integrates over a distribution of values for each missing entry. The distribution of missing values is a free parameter that is optimized to learn an optimal decision function. The outbound stream of emails of an email service provider can be separated by the customer IDs that ask for delivery. All emails that are sent by the same ID in the same period of time are related, both in content and in label. Hijacked customer accounts may send batches of unsolicited emails to other email providers, which in turn might blacklist the sender's email servers after detection of incoming spam emails. The risk of being blocked from further delivery depends on the rate of outgoing unwanted emails and the duration of high spam sending rates. An optimization problem is developed that minimizes the expected cost for the email provider by learning a decision function that assigns a limit on the sending rate to customers based on the each customer's email stream. Identifying attacking IPs during HTTP-level DDoS attacks allows to block those IPs from further accessing the web servers. DDoS attacks are usually carried out by infected clients that are members of the same botnet and show similar traffic patterns. HTTP-level attacks aim at exhausting one or more resources of the web server infrastructure, such as CPU time. If the joint set of attackers cannot increase resource usage close to the maximum capacity, no effect will be experienced by legitimate users of hosted web sites. However, if the additional load raises the computational burden towards the critical range, user experience will degrade until service may be unavailable altogether. As the loss of missing one attacker depends on block decisions for other attackers---if most other attackers are detected, not blocking one client will likely not be harmful---a structured output model has to be learned. In this thesis an algorithm is developed that learns a structured prediction decoder that searches the space of label assignments, guided by a policy. Each model is evaluated on real-world data and is compared to reference methods. The results show that modeling each classification problem according to the specific demands of the task improves performance over solutions that do not consider the constraints inherent to an application.}, language = {en} } @phdthesis{Weidling2016, author = {Weidling, Stefan}, title = {Neue Ans{\"a}tze zur Verbesserung der Fehlertoleranz gegen{\"u}ber transienten Fehlern in sequentiellen Schaltungen}, school = {Universit{\"a}t Potsdam}, pages = {XII, 181}, year = {2016}, language = {de} } @phdthesis{Niess2016, author = {Nieß, G{\"u}nther}, title = {Modellierung und Erkennung von technischen Fehlern mittels linearer und nichtlinearer Codes}, school = {Universit{\"a}t Potsdam}, pages = {V, 97}, year = {2016}, language = {de} } @phdthesis{Schindler2016, author = {Schindler, Sven}, title = {Honeypot Architectures for IPv6 Networks}, school = {Universit{\"a}t Potsdam}, pages = {164}, year = {2016}, language = {en} } @phdthesis{Mueller2016, author = {Mueller, Stefanie}, title = {Interacting with personal fabrication devices}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-100908}, school = {Universit{\"a}t Potsdam}, pages = {xxi, 108}, year = {2016}, abstract = {Personal fabrication tools, such as 3D printers, are on the way of enabling a future in which non-technical users will be able to create custom objects. However, while the hardware is there, the current interaction model behind existing design tools is not suitable for non-technical users. Today, 3D printers are operated by fabricating the object in one go, which tends to take overnight due to the slow 3D printing technology. Consequently, the current interaction model requires users to think carefully before printing as every mistake may imply another overnight print. Planning every step ahead, however, is not feasible for non-technical users as they lack the experience to reason about the consequences of their design decisions. In this dissertation, we propose changing the interaction model around personal fabrication tools to better serve this user group. We draw inspiration from personal computing and argue that the evolution of personal fabrication may resemble the evolution of personal computing: Computing started with machines that executed a program in one go before returning the result to the user. By decreasing the interaction unit to single requests, turn-taking systems such as the command line evolved, which provided users with feedback after every input. Finally, with the introduction of direct-manipulation interfaces, users continuously interacted with a program receiving feedback about every action in real-time. In this dissertation, we explore whether these interaction concepts can be applied to personal fabrication as well. We start with fabricating an object in one go and investigate how to tighten the feedback-cycle on an object-level: We contribute a method called low-fidelity fabrication, which saves up to 90\% fabrication time by creating objects as fast low-fidelity previews, which are sufficient to evaluate key design aspects. Depending on what is currently being tested, we propose different conversions that enable users to focus on different parts: faBrickator allows for a modular design in the early stages of prototyping; when users move on WirePrint allows quickly testing an object's shape, while Platener allows testing an object's technical function. We present an interactive editor for each technique and explain the underlying conversion algorithms. By interacting on smaller units, such as a single element of an object, we explore what it means to transition from systems that fabricate objects in one go to turn-taking systems. We start with a 2D system called constructable: Users draw with a laser pointer onto the workpiece inside a laser cutter. The drawing is captured with an overhead camera. As soon as the the user finishes drawing an element, such as a line, the constructable system beautifies the path and cuts it--resulting in physical output after every editing step. We extend constructable towards 3D editing by developing a novel laser-cutting technique for 3D objects called LaserOrigami that works by heating up the workpiece with the defocused laser until the material becomes compliant and bends down under gravity. While constructable and LaserOrigami allow for fast physical feedback, the interaction is still best described as turn-taking since it consists of two discrete steps: users first create an input and afterwards the system provides physical output. By decreasing the interaction unit even further to a single feature, we can achieve real-time physical feedback: Input by the user and output by the fabrication device are so tightly coupled that no visible lag exists. This allows us to explore what it means to transition from turn-taking interfaces, which only allow exploring one option at a time, to direct manipulation interfaces with real-time physical feedback, which allow users to explore the entire space of options continuously with a single interaction. We present a system called FormFab, which allows for such direct control. FormFab is based on the same principle as LaserOrigami: It uses a workpiece that when warmed up becomes compliant and can be reshaped. However, FormFab achieves the reshaping not based on gravity, but through a pneumatic system that users can control interactively. As users interact, they see the shape change in real-time. We conclude this dissertation by extrapolating the current evolution into a future in which large numbers of people use the new technology to create objects. We see two additional challenges on the horizon: sustainability and intellectual property. We investigate sustainability by demonstrating how to print less and instead patch physical objects. We explore questions around intellectual property with a system called Scotty that transfers objects without creating duplicates, thereby preserving the designer's copyright.}, language = {en} } @phdthesis{Kilic2016, author = {Kilic, Mukayil}, title = {Vernetztes Pr{\"u}fen von elektronischen Komponenten {\"u}ber das Internet}, school = {Universit{\"a}t Potsdam}, pages = {104, XVI}, year = {2016}, language = {de} } @phdthesis{Prasse2016, author = {Prasse, Paul}, title = {Pattern recognition for computer security}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-100251}, school = {Universit{\"a}t Potsdam}, pages = {VI, 75}, year = {2016}, abstract = {Computer Security deals with the detection and mitigation of threats to computer networks, data, and computing hardware. This thesis addresses the following two computer security problems: email spam campaign and malware detection. Email spam campaigns can easily be generated using popular dissemination tools by specifying simple grammars that serve as message templates. A grammar is disseminated to nodes of a bot net, the nodes create messages by instantiating the grammar at random. Email spam campaigns can encompass huge data volumes and therefore pose a threat to the stability of the infrastructure of email service providers that have to store them. Malware -software that serves a malicious purpose- is affecting web servers, client computers via active content, and client computers through executable files. Without the help of malware detection systems it would be easy for malware creators to collect sensitive information or to infiltrate computers. The detection of threats -such as email-spam messages, phishing messages, or malware- is an adversarial and therefore intrinsically difficult problem. Threats vary greatly and evolve over time. The detection of threats based on manually-designed rules is therefore difficult and requires a constant engineering effort. Machine-learning is a research area that revolves around the analysis of data and the discovery of patterns that describe aspects of the data. Discriminative learning methods extract prediction models from data that are optimized to predict a target attribute as accurately as possible. Machine-learning methods hold the promise of automatically identifying patterns that robustly and accurately detect threats. This thesis focuses on the design and analysis of discriminative learning methods for the two computer-security problems under investigation: email-campaign and malware detection. The first part of this thesis addresses email-campaign detection. We focus on regular expressions as a syntactic framework, because regular expressions are intuitively comprehensible by security engineers and administrators, and they can be applied as a detection mechanism in an extremely efficient manner. In this setting, a prediction model is provided with exemplary messages from an email-spam campaign. The prediction model has to generate a regular expression that reveals the syntactic pattern that underlies the entire campaign, and that a security engineers finds comprehensible and feels confident enough to use the expression to blacklist further messages at the email server. We model this problem as two-stage learning problem with structured input and output spaces which can be solved using standard cutting plane methods. Therefore we develop an appropriate loss function, and derive a decoder for the resulting optimization problem. The second part of this thesis deals with the problem of predicting whether a given JavaScript or PHP file is malicious or benign. Recent malware analysis techniques use static or dynamic features, or both. In fully dynamic analysis, the software or script is executed and observed for malicious behavior in a sandbox environment. By contrast, static analysis is based on features that can be extracted directly from the program file. In order to bypass static detection mechanisms, code obfuscation techniques are used to spread a malicious program file in many different syntactic variants. Deobfuscating the code before applying a static classifier can be subjected to mostly static code analysis and can overcome the problem of obfuscated malicious code, but on the other hand increases the computational costs of malware detection by an order of magnitude. In this thesis we present a cascaded architecture in which a classifier first performs a static analysis of the original code and -based on the outcome of this first classification step- the code may be deobfuscated and classified again. We explore several types of features including token \$n\$-grams, orthogonal sparse bigrams, subroutine-hashings, and syntax-tree features and study the robustness of detection methods and feature types against the evolution of malware over time. The developed tool scans very large file collections quickly and accurately. Each model is evaluated on real-world data and compared to reference methods. Our approach of inferring regular expressions to filter emails belonging to an email spam campaigns leads to models with a high true-positive rate at a very low false-positive rate that is an order of magnitude lower than that of a commercial content-based filter. Our presented system -REx-SVMshort- is being used by a commercial email service provider and complements content-based and IP-address based filtering. Our cascaded malware detection system is evaluated on a high-quality data set of almost 400,000 conspicuous PHP files and a collection of more than 1,00,000 JavaScript files. From our case study we can conclude that our system can quickly and accurately process large data collections at a low false-positive rate.}, language = {en} }