@article{TorkuraSukmanaChengetal.2020, author = {Torkura, Kennedy A. and Sukmana, Muhammad Ihsan Haikal and Cheng, Feng and Meinel, Christoph}, title = {CloudStrike}, series = {IEEE access : practical research, open solutions}, volume = {8}, journal = {IEEE access : practical research, open solutions}, publisher = {Institute of Electrical and Electronics Engineers }, address = {Piscataway}, issn = {2169-3536}, doi = {10.1109/ACCESS.2020.3007338}, pages = {123044 -- 123060}, year = {2020}, abstract = {Most cyber-attacks and data breaches in cloud infrastructure are due to human errors and misconfiguration vulnerabilities. Cloud customer-centric tools are imperative for mitigating these issues, however existing cloud security models are largely unable to tackle these security challenges. Therefore, novel security mechanisms are imperative, we propose Risk-driven Fault Injection (RDFI) techniques to address these challenges. RDFI applies the principles of chaos engineering to cloud security and leverages feedback loops to execute, monitor, analyze and plan security fault injection campaigns, based on a knowledge-base. The knowledge-base consists of fault models designed from secure baselines, cloud security best practices and observations derived during iterative fault injection campaigns. These observations are helpful for identifying vulnerabilities while verifying the correctness of security attributes (integrity, confidentiality and availability). Furthermore, RDFI proactively supports risk analysis and security hardening efforts by sharing security information with security mechanisms. We have designed and implemented the RDFI strategies including various chaos engineering algorithms as a software tool: CloudStrike. Several evaluations have been conducted with CloudStrike against infrastructure deployed on two major public cloud infrastructure: Amazon Web Services and Google Cloud Platform. The time performance linearly increases, proportional to increasing attack rates. Also, the analysis of vulnerabilities detected via security fault injection has been used to harden the security of cloud resources to demonstrate the effectiveness of the security information provided by CloudStrike. Therefore, we opine that our approaches are suitable for overcoming contemporary cloud security issues.}, language = {en} } @article{GruenerMuehleMeinel2021, author = {Gr{\"u}ner, Andreas and M{\"u}hle, Alexander and Meinel, Christoph}, title = {ATIB}, series = {IEEE access : practical research, open solutions / Institute of Electrical and Electronics Engineers}, volume = {9}, journal = {IEEE access : practical research, open solutions / Institute of Electrical and Electronics Engineers}, publisher = {Institute of Electrical and Electronics Engineers}, address = {New York, NY}, issn = {2169-3536}, doi = {10.1109/ACCESS.2021.3116095}, pages = {138553 -- 138570}, year = {2021}, abstract = {Identity management is a principle component of securing online services. In the advancement of traditional identity management patterns, the identity provider remained a Trusted Third Party (TTP). The service provider and the user need to trust a particular identity provider for correct attributes amongst other demands. This paradigm changed with the invention of blockchain-based Self-Sovereign Identity (SSI) solutions that primarily focus on the users. SSI reduces the functional scope of the identity provider to an attribute provider while enabling attribute aggregation. Besides that, the development of new protocols, disregarding established protocols and a significantly fragmented landscape of SSI solutions pose considerable challenges for an adoption by service providers. We propose an Attribute Trust-enhancing Identity Broker (ATIB) to leverage the potential of SSI for trust-enhancing attribute aggregation. Furthermore, ATIB abstracts from a dedicated SSI solution and offers standard protocols. Therefore, it facilitates the adoption by service providers. Despite the brokered integration approach, we show that ATIB provides a high security posture. Additionally, ATIB does not compromise the ten foundational SSI principles for the users.}, language = {en} } @book{MeinelWillemsStaubitzetal.2022, author = {Meinel, Christoph and Willems, Christian and Staubitz, Thomas and Sauer, Dominic and Hagedorn, Christiane}, title = {openHPI}, number = {150}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-546-0}, issn = {1613-5652}, doi = {10.25932/publishup-56179}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-561792}, publisher = {Universit{\"a}t Potsdam}, pages = {86}, year = {2022}, abstract = {Anl{\"a}sslich des 10-j{\"a}hrigen Jubil{\"a}ums von openHPI informiert dieser technische Bericht {\"u}ber die HPI-MOOC-Plattform einschließlich ihrer Kernfunktionen, Technologie und Architektur. In einer Einleitung wird die Plattformfamilie mit allen Partnerplattformen vorgestellt; diese belaufen sich inklusive openHPI aktuell auf neun Plattformen. In diesem Abschnitt wird außerdem gezeigt, wie openHPI als Berater und Forschungspartner in verschiedenen Projekten fungiert. Im zweiten Kapitel werden die Funktionalit{\"a}ten und g{\"a}ngigen Kursformate der Plattform pr{\"a}sentiert. Die Funktionalit{\"a}ten sind in Lerner- und Admin-Funktionen unterteilt. Der Bereich Lernerfunktionen bietet detaillierte Informationen zu Leistungsnachweisen, Kursen und den Lernmaterialien, aus denen sich ein Kurs zusammensetzt: Videos, Texte und Quiz. Dar{\"u}ber hinaus k{\"o}nnen die Lernmaterialien durch externe {\"U}bungstools angereichert werden, die {\"u}ber den Standard Learning Tools Interoperability (LTI) mit der HPI MOOC-Plattform kommunizieren. Das Konzept der Peer-Assessments rundet die m{\"o}glichen Lernmaterialien ab. Der Abschnitt geht dann weiter auf das Diskussionsforum ein, das einen grundlegenden Unterschied von MOOCs im Vergleich zu traditionellen E-Learning-Angeboten darstellt. Zum Abschluss des Abschnitts folgen eine Beschreibung von Quiz-Recap, Lernzielen, mobilen Anwendungen, spielerischen Lernens und dem Helpdesk. Der n{\"a}chste Teil dieses Kapitels besch{\"a}ftigt sich mit den Admin-Funktionen. Die Funktionalit{\"a}tsbeschreibung beschr{\"a}nkt sich Neuigkeiten und Ank{\"u}ndigungen, Dashboards und Statistiken, Berichtsfunktionen, Forschungsoptionen mit A/B-Tests, den Kurs-Feed und das TransPipe-Tool zur Unterst{\"u}tzung beim Erstellen von automatischen oder manuellen Untertiteln. Die Plattform unterst{\"u}tzt außerdem eine Vielzahl zus{\"a}tzlicher Funktionen, doch eine detaillierte Beschreibung dieser Funktionen w{\"u}rde den Rahmen des Berichts sprengen. Das Kapitel geht dann auf g{\"a}ngige Kursformate und openHPI-Lehrveranstaltungen am HPI ein, bevor es mit einigen Best Practices f{\"u}r die Gestaltung und Durchf{\"u}hrung von Kursen schließt. Zum Abschluss des technischen Berichts gibt das letzte Kapitel eine Zusammenfassung und einen Ausblick auf die Zukunft der digitalen Bildung. Ein besonderes Merkmal des openHPI-Projekts ist die bewusste Entscheidung, die komplette Anwendung von den physischen Netzwerkkomponenten bis zur Plattformentwicklung eigenst{\"a}ndig zu betreiben. Bei der vorliegenden deutschen Variante handelt es sich um eine gek{\"u}rzte {\"U}bersetzung des technischen Berichts 148, bei der kein Einblick in die Technologien und Architektur von openHPI gegeben wird. Interessierte Leser:innen k{\"o}nnen im technischen Bericht 148 (vollst{\"a}ndige englische Version) detaillierte Informationen zum Rechenzentrum und den Ger{\"a}ten, der Cloud-Software und dem openHPI Cloud Service aber auch zu Infrastruktur-Anwendungskomponenten wie Entwicklungstools, Automatisierung, Deployment-Pipeline und Monitoring erhalten. Außerdem finden sich dort weitere Informationen {\"u}ber den Technologiestack und konkrete Implementierungsdetails der Plattform inklusive der serviceorientierten Ruby on Rails-Anwendung, die Kommunikation zwischen den Diensten, {\"o}ffentliche APIs, sowie Designsystem und -komponenten. Der Abschnitt schließt mit einer Diskussion {\"u}ber die urspr{\"u}ngliche Microservice-Architektur und die Migration zu einer monolithischen Anwendung.}, language = {de} } @incollection{CorazzaThienen2023, author = {Corazza, Giovanni Emanuele and Thienen, Julia von}, title = {Invention}, series = {The Palgrave encyclopedia of the possible}, booktitle = {The Palgrave encyclopedia of the possible}, editor = {Glăveanu, Vlad Petre}, publisher = {Springer International Publishing}, address = {Cham}, isbn = {978-3-030-90912-3}, doi = {10.1007/978-3-030-90913-0_14}, pages = {806 -- 814}, year = {2023}, abstract = {This entry addresses invention from five different perspectives: (i) definition of the term, (ii) mechanisms underlying invention processes, (iii) (pre-)history of human inventions, (iv) intellectual property protection vs open innovation, and (v) case studies of great inventors. Regarding the definition, an invention is the outcome of a creative process taking place within a technological milieu, which is recognized as successful in terms of its effectiveness as an original technology. In the process of invention, a technological possibility becomes realized. Inventions are distinct from either discovery or innovation. In human creative processes, seven mechanisms of invention can be observed, yielding characteristic outcomes: (1) basic inventions, (2) invention branches, (3) invention combinations, (4) invention toolkits, (5) invention exaptations, (6) invention values, and (7) game-changing inventions. The development of humanity has been strongly shaped by inventions ever since early stone tools and the conception of agriculture. An "explosion of creativity" has been associated with Homo sapiens, and inventions in all fields of human endeavor have followed suit, engendering an exponential growth of cumulative culture. This culture development emerges essentially through a reuse of previous inventions, their revision, amendment and rededication. In sociocultural terms, humans have increasingly regulated processes of invention and invention-reuse through concepts such as intellectual property, patents, open innovation and licensing methods. Finally, three case studies of great inventors are considered: Edison, Marconi, and Montessori, next to a discussion of human invention processes as collaborative endeavors.}, language = {en} } @phdthesis{Schnjakin2014, author = {Schnjakin, Maxim}, title = {Cloud-RAID}, pages = {137}, year = {2014}, language = {de} } @phdthesis{Wang2016, author = {Wang, Cheng}, title = {Deep Learning of Multimodal Representations}, school = {Universit{\"a}t Potsdam}, pages = {142}, year = {2016}, language = {en} } @phdthesis{Saleh2016, author = {Saleh, Eyad}, title = {Securing Multi-tenant SaaS Environments}, school = {Universit{\"a}t Potsdam}, pages = {108}, year = {2016}, abstract = {Software-as-a-Service (SaaS) offers several advantages to both service providers and users. Service providers can benefit from the reduction of Total Cost of Ownership (TCO), better scalability, and better resource utilization. On the other hand, users can use the service anywhere and anytime, and minimize upfront investment by following the pay-as-you-go model. Despite the benefits of SaaS, users still have concerns about the security and privacy of their data. Due to the nature of SaaS and the Cloud in general, the data and the computation are beyond the users' control, and hence data security becomes a vital factor in this new paradigm. Furthermore, in multi-tenant SaaS applications, the tenants become more concerned about the confidentiality of their data since several tenants are co-located onto a shared infrastructure. To address those concerns, we start protecting the data from the provisioning process by controlling how tenants are being placed in the infrastructure. We present a resource allocation algorithm designed to minimize the risk of co-resident tenants called SecPlace. It enables the SaaS provider to control the resource (i.e., database instance) allocation process while taking into account the security of tenants as a requirement. Due to the design principles of the multi-tenancy model, tenants follow some degree of sharing on both application and infrastructure levels. Thus, strong security-isolation should be present. Therefore, we develop SignedQuery, a technique that prevents one tenant from accessing others' data. We use the Signing Concept to create a signature that is used to sign the tenant's request, then the server can verifies the signature and recognizes the requesting tenant, and hence ensures that the data to be accessed is belonging to the legitimate tenant. Finally, Data confidentiality remains a critical concern due to the fact that data in the Cloud is out of users' premises, and hence beyond their control. Cryptography is increasingly proposed as a potential approach to address such a challenge. Therefore, we present SecureDB, a system designed to run SQL-based applications over an encrypted database. SecureDB captures the schema design and analyzes it to understand the internal structure of the data (i.e., relationships between the tables and their attributes). Moreover, we determine the appropriate partialhomomorphic encryption scheme for each attribute where computation is possible even when the data is encrypted. To evaluate our work, we conduct extensive experiments with di↵erent settings. The main use case in our work is a popular open source HRM application, called OrangeHRM. The results show that our multi-layered approach is practical, provides enhanced security and isolation among tenants, and have a moderate complexity in terms of processing encrypted data.}, language = {en} } @phdthesis{SadrAzodi2015, author = {Sadr-Azodi, Amir Shahab}, title = {Towards Real-time SIEM-based Network monitoring and Intrusion Detection through Advanced Event Normalization}, school = {Universit{\"a}t Potsdam}, pages = {144}, year = {2015}, language = {en} } @phdthesis{Malchow2019, author = {Malchow, Martin}, title = {Nutzerunterst{\"u}tzung und -Motivation in E-Learning Vorlesungsarchiven und MOOCs}, school = {Universit{\"a}t Potsdam}, pages = {142}, year = {2019}, abstract = {In den letzten Jahren ist die Aufnahme und Verbreitung von Videos immer einfacher geworden. Daher sind die Relevanz und Beliebtheit zur Aufnahme von Vorlesungsvideos in den letzten Jahren stark angestiegen. Dies f{\"u}hrt zu einem großen Datenbestand an Vorlesungsvideos in den Video-Vorlesungsarchiven der Universit{\"a}ten. Durch diesen wachsenden Datenbestand wird es allerdings f{\"u}r die Studenten immer schwieriger, die relevanten Videos eines Vorlesungsarchivs aufzufinden. Zus{\"a}tzlich haben viele Lerninteressierte durch ihre allt{\"a}gliche Arbeit und famili{\"a}ren Verpflichtungen immer weniger Zeit sich mit dem Lernen zu besch{\"a}ftigen. Ein weiterer Aspekt, der das Lernen im Internet erschwert, ist, dass es durch soziale Netzwerke und anderen Online-Plattformen vielf{\"a}ltige Ablenkungsm{\"o}glichkeiten gibt. Daher ist das Ziel dieser Arbeit, M{\"o}glichkeiten aufzuzeigen, welche das E-Learning bieten kann, um Nutzer beim Lernprozess zu unterst{\"u}tzen und zu motivieren. Das Hauptkonzept zur Unterst{\"u}tzung der Studenten ist das pr{\"a}zise Auffinden von Informationen in den immer weiter wachsenden Vorlesungsvideoarchiven. Dazu werden die Vorlesungen im Voraus analysiert und die Texte der Vorlesungsfolien mit verschiedenen Methoden indexiert. Daraufhin k{\"o}nnen die Studenten mit der Suche oder dem Lecture-Butler Lerninhalte entsprechend Ihres aktuellen Wissensstandes auffinden. Die m{\"o}glichen verwendeten Technologien f{\"u}r das Auffinden wurden, sowohl technisch, als auch durch Studentenumfragen erfolgreich evaluiert. Zur Motivation von Studenten in Vorlesungsarchiven werden diverse Konzepte betrachtet und die Umsetzung evaluiert, die den Studenten interaktiv in den Lernprozess einbeziehen. Neben Vorlesungsarchiven existieren sowohl im privaten als auch im dienstlichen Weiterbildungsbereich die in den letzten Jahren immer beliebter werdenden MOOCs. Generell sind die Abschlussquoten von MOOCs allerdings mit durchschnittlich 7\% eher gering. Daher werden Motivationsl{\"o}sungen f{\"u}r MOOCs im Bereich von eingebetteten Systemen betrachtet, die in praktischen Programmierkursen Anwendung finden. Zus{\"a}tzlich wurden Kurse evaluiert, welche die Programmierung von eingebetteten Systemen behandeln. Die Verf{\"u}gbarkeit war bei Kursen von bis zu 10.000 eingeschriebenen Teilnehmern hierbei kein schwerwiegendes Problem. Die Verwendung von eingebetteten Systemen in Programmierkursen sind bei den Studenten in der praktischen Umsetzung auf sehr großes Interesse gestoßen.}, language = {de} } @article{BinTareafBergerHennigetal.2020, author = {Bin Tareaf, Raad and Berger, Philipp and Hennig, Patrick and Meinel, Christoph}, title = {Cross-platform personality exploration system for online social networks}, series = {Web intelligence}, volume = {18}, journal = {Web intelligence}, number = {1}, publisher = {IOS Press}, address = {Amsterdam}, issn = {2405-6456}, doi = {10.3233/WEB-200427}, pages = {35 -- 51}, year = {2020}, abstract = {Social networking sites (SNS) are a rich source of latent information about individual characteristics. Crawling and analyzing this content provides a new approach for enterprises to personalize services and put forward product recommendations. In the past few years, commercial brands made a gradual appearance on social media platforms for advertisement, customers support and public relation purposes and by now it became a necessity throughout all branches. This online identity can be represented as a brand personality that reflects how a brand is perceived by its customers. We exploited recent research in text analysis and personality detection to build an automatic brand personality prediction model on top of the (Five-Factor Model) and (Linguistic Inquiry and Word Count) features extracted from publicly available benchmarks. Predictive evaluation on brands' accounts reveals that Facebook platform provides a slight advantage over Twitter platform in offering more self-disclosure for users' to express their emotions especially their demographic and psychological traits. Results also confirm the wider perspective that the same social media account carry a quite similar and comparable personality scores over different social media platforms. For evaluating our prediction results on actual brands' accounts, we crawled the Facebook API and Twitter API respectively for 100k posts from the most valuable brands' pages in the USA and we visualize exemplars of comparison results and present suggestions for future directions.}, language = {en} } @phdthesis{Najafi2023, author = {Najafi, Pejman}, title = {Leveraging data science \& engineering for advanced security operations}, doi = {10.25932/publishup-61225}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-612257}, school = {Universit{\"a}t Potsdam}, pages = {xix, 180}, year = {2023}, abstract = {The Security Operations Center (SOC) represents a specialized unit responsible for managing security within enterprises. To aid in its responsibilities, the SOC relies heavily on a Security Information and Event Management (SIEM) system that functions as a centralized repository for all security-related data, providing a comprehensive view of the organization's security posture. Due to the ability to offer such insights, SIEMS are considered indispensable tools facilitating SOC functions, such as monitoring, threat detection, and incident response. Despite advancements in big data architectures and analytics, most SIEMs fall short of keeping pace. Architecturally, they function merely as log search engines, lacking the support for distributed large-scale analytics. Analytically, they rely on rule-based correlation, neglecting the adoption of more advanced data science and machine learning techniques. This thesis first proposes a blueprint for next-generation SIEM systems that emphasize distributed processing and multi-layered storage to enable data mining at a big data scale. Next, with the architectural support, it introduces two data mining approaches for advanced threat detection as part of SOC operations. First, a novel graph mining technique that formulates threat detection within the SIEM system as a large-scale graph mining and inference problem, built on the principles of guilt-by-association and exempt-by-reputation. The approach entails the construction of a Heterogeneous Information Network (HIN) that models shared characteristics and associations among entities extracted from SIEM-related events/logs. Thereon, a novel graph-based inference algorithm is used to infer a node's maliciousness score based on its associations with other entities in the HIN. Second, an innovative outlier detection technique that imitates a SOC analyst's reasoning process to find anomalies/outliers. The approach emphasizes explainability and simplicity, achieved by combining the output of simple context-aware univariate submodels that calculate an outlier score for each entry. Both approaches were tested in academic and real-world settings, demonstrating high performance when compared to other algorithms as well as practicality alongside a large enterprise's SIEM system. This thesis establishes the foundation for next-generation SIEM systems that can enhance today's SOCs and facilitate the transition from human-centric to data-driven security operations.}, language = {en} } @article{Perscheid2021, author = {Perscheid, Cindy}, title = {Integrative biomarker detection on high-dimensional gene expression data sets}, series = {Briefings in bioinformatics}, volume = {22}, journal = {Briefings in bioinformatics}, number = {3}, publisher = {Oxford Univ. Press}, address = {Oxford}, issn = {1467-5463}, doi = {10.1093/bib/bbaa151}, pages = {18}, year = {2021}, abstract = {Gene expression data provide the expression levels of tens of thousands of genes from several hundred samples. These data are analyzed to detect biomarkers that can be of prognostic or diagnostic use. Traditionally, biomarker detection for gene expression data is the task of gene selection. The vast number of genes is reduced to a few relevant ones that achieve the best performance for the respective use case. Traditional approaches select genes based on their statistical significance in the data set. This results in issues of robustness, redundancy and true biological relevance of the selected genes. Integrative analyses typically address these shortcomings by integrating multiple data artifacts from the same objects, e.g. gene expression and methylation data. When only gene expression data are available, integrative analyses instead use curated information on biological processes from public knowledge bases. With knowledge bases providing an ever-increasing amount of curated biological knowledge, such prior knowledge approaches become more powerful. This paper provides a thorough overview on the status quo of biomarker detection on gene expression data with prior biological knowledge. We discuss current shortcomings of traditional approaches, review recent external knowledge bases, provide a classification and qualitative comparison of existing prior knowledge approaches and discuss open challenges for this kind of gene selection.}, language = {en} } @article{Perscheid2021, author = {Perscheid, Cindy}, title = {Comprior}, series = {BMC Bioinformatics}, volume = {22}, journal = {BMC Bioinformatics}, publisher = {Springer Nature}, address = {London}, issn = {1471-2105}, doi = {10.1186/s12859-021-04308-z}, pages = {1 -- 15}, year = {2021}, abstract = {Background Reproducible benchmarking is important for assessing the effectiveness of novel feature selection approaches applied on gene expression data, especially for prior knowledge approaches that incorporate biological information from online knowledge bases. However, no full-fledged benchmarking system exists that is extensible, provides built-in feature selection approaches, and a comprehensive result assessment encompassing classification performance, robustness, and biological relevance. Moreover, the particular needs of prior knowledge feature selection approaches, i.e. uniform access to knowledge bases, are not addressed. As a consequence, prior knowledge approaches are not evaluated amongst each other, leaving open questions regarding their effectiveness. Results We present the Comprior benchmark tool, which facilitates the rapid development and effortless benchmarking of feature selection approaches, with a special focus on prior knowledge approaches. Comprior is extensible by custom approaches, offers built-in standard feature selection approaches, enables uniform access to multiple knowledge bases, and provides a customizable evaluation infrastructure to compare multiple feature selection approaches regarding their classification performance, robustness, runtime, and biological relevance. Conclusion Comprior allows reproducible benchmarking especially of prior knowledge approaches, which facilitates their applicability and for the first time enables a comprehensive assessment of their effectiveness}, language = {en} } @article{LosterKoumarelasNaumann2021, author = {Loster, Michael and Koumarelas, Ioannis and Naumann, Felix}, title = {Knowledge transfer for entity resolution with siamese neural networks}, series = {ACM journal of data and information quality}, volume = {13}, journal = {ACM journal of data and information quality}, number = {1}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {1936-1955}, doi = {10.1145/3410157}, pages = {25}, year = {2021}, abstract = {The integration of multiple data sources is a common problem in a large variety of applications. Traditionally, handcrafted similarity measures are used to discover, merge, and integrate multiple representations of the same entity-duplicates-into a large homogeneous collection of data. Often, these similarity measures do not cope well with the heterogeneity of the underlying dataset. In addition, domain experts are needed to manually design and configure such measures, which is both time-consuming and requires extensive domain expertise.
We propose a deep Siamese neural network, capable of learning a similarity measure that is tailored to the characteristics of a particular dataset. With the properties of deep learning methods, we are able to eliminate the manual feature engineering process and thus considerably reduce the effort required for model construction. In addition, we show that it is possible to transfer knowledge acquired during the deduplication of one dataset to another, and thus significantly reduce the amount of data required to train a similarity measure. We evaluated our method on multiple datasets and compare our approach to state-of-the-art deduplication methods. Our approach outperforms competitors by up to +26 percent F-measure, depending on task and dataset. In addition, we show that knowledge transfer is not only feasible, but in our experiments led to an improvement in F-measure of up to +4.7 percent.}, language = {en} } @phdthesis{Baier2015, author = {Baier, Thomas}, title = {Matching events and activities}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-84548}, school = {Universit{\"a}t Potsdam}, pages = {xxii, 213}, year = {2015}, abstract = {Nowadays, business processes are increasingly supported by IT services that produce massive amounts of event data during process execution. Aiming at a better process understanding and improvement, this event data can be used to analyze processes using process mining techniques. Process models can be automatically discovered and the execution can be checked for conformance to specified behavior. Moreover, existing process models can be enhanced and annotated with valuable information, for example for performance analysis. While the maturity of process mining algorithms is increasing and more tools are entering the market, process mining projects still face the problem of different levels of abstraction when comparing events with modeled business activities. Mapping the recorded events to activities of a given process model is essential for conformance checking, annotation and understanding of process discovery results. Current approaches try to abstract from events in an automated way that does not capture the required domain knowledge to fit business activities. Such techniques can be a good way to quickly reduce complexity in process discovery. Yet, they fail to enable techniques like conformance checking or model annotation, and potentially create misleading process discovery results by not using the known business terminology. In this thesis, we develop approaches that abstract an event log to the same level that is needed by the business. Typically, this abstraction level is defined by a given process model. Thus, the goal of this thesis is to match events from an event log to activities in a given process model. To accomplish this goal, behavioral and linguistic aspects of process models and event logs as well as domain knowledge captured in existing process documentation are taken into account to build semiautomatic matching approaches. The approaches establish a pre--processing for every available process mining technique that produces or annotates a process model, thereby reducing the manual effort for process analysts. While each of the presented approaches can be used in isolation, we also introduce a general framework for the integration of different matching approaches. The approaches have been evaluated in case studies with industry and using a large industry process model collection and simulated event logs. The evaluation demonstrates the effectiveness and efficiency of the approaches and their robustness towards nonconforming execution logs.}, language = {en} } @book{HerbstMaschlerNiephausetal.2015, author = {Herbst, Eva-Maria and Maschler, Fabian and Niephaus, Fabio and Reimann, Max and Steier, Julia and Felgentreff, Tim and Lincke, Jens and Taeumel, Marcel and Hirschfeld, Robert and Witt, Carsten}, title = {ecoControl}, number = {93}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-318-3}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-72147}, publisher = {Universit{\"a}t Potsdam}, pages = {viii, 142}, year = {2015}, abstract = {Eine dezentrale Energieversorgung ist ein erster Schritt in Richtung Energiewende. Dabei werden auch in Mehrfamilienh{\"a}usern vermehrt verschiedene Strom- und W{\"a}rmeerzeuger eingesetzt. Besonders in Deutschland kommen in diesem Zusammenhang Blockheizkraftwerke immer h{\"a}ufiger zum Einsatz, weil sie Gas sehr effizient in Strom und W{\"a}rme umwandeln k{\"o}nnen. Außerdem erm{\"o}glichen sie, im Zusammenspiel mit anderen Energiesystemen wie beispielsweise Photovoltaik-Anlagen, eine kontinuierliche und dezentrale Energieversorgung. Bei dem Betrieb von unterschiedlichen Energiesystemen ist es w{\"u}nschenswert, dass die Systeme aufeinander abgestimmt arbeiten. Allerdings ist es bisher schwierig, heterogene Energiesysteme effizient miteinander zu betreiben. Dadurch bleiben Einsparungspotentiale ungenutzt. Eine zentrale Steuerung kann deshalb die Effizienz des Gesamtsystems verbessern. Mit ecoControl stellen wir einen erweiterbaren Prototypen vor, der die Kooperation von Energiesystemen optimiert und Umweltfaktoren miteinbezieht. Dazu stellt die Software eine einheitliche Bedienungsoberfl{\"a}che zur Konfiguration aller Systeme zur Verf{\"u}gung. Außerdem bietet sie die M{\"o}glichkeit, Optimierungsalgorithmen mit Hilfe einer Programmierschnittstelle zu entwickeln, zu testen und auszuf{\"u}hren. Innerhalb solcher Algorithmen k{\"o}nnen von ecoControl bereitgestellte Vorhersagen genutzt werden. Diese Vorhersagen basieren auf dem individuellen Verhalten von jedem Energiesystem, Wettervorhersagen und auf Prognosen des Energieverbrauchs. Mithilfe einer Simulation k{\"o}nnen Techniker unterschiedliche Konfigurationen und Optimierungen sofort ausprobieren, ohne diese {\"u}ber einen langen Zeitraum an realen Ger{\"a}ten testen zu m{\"u}ssen. ecoControl hilft dar{\"u}ber hinaus auch Hausverwaltungen und Vermietern bei der Verwaltung und Analyse der Energiekosten. Wir haben anhand von Fallbeispielen gezeigt, dass Optimierungsalgorithmen, welche die Nutzung von W{\"a}rmespeichern verbessern, die Effizienz des Gesamtsystems erheblich verbessern k{\"o}nnen. Schließlich kommen wir zu dem Schluss, dass ecoControl in einem n{\"a}chsten Schritt unter echten Bedingungen getestet werden muss, sobald eine geeignete Hardwarekomponente verf{\"u}gbar ist. {\"U}ber diese Schnittstelle werden die Messwerte an ecoControl gesendet und Steuersignale an die Ger{\"a}te weitergeleitet.}, language = {de} } @article{DittmarBuchholzKuehn2016, author = {Dittmar, Anke and Buchholz, Gregor and K{\"u}hn, Mathias}, title = {Eine Studie zum kollaborativen Modellieren in der Softwaretechnik-Ausbildung}, series = {Commentarii informaticae didacticae (CID)}, journal = {Commentarii informaticae didacticae (CID)}, number = {10}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-376-3}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-94806}, pages = {41 -- 53}, year = {2016}, abstract = {Die Vermittlung von Modellierungsf{\"a}higkeiten in der Softwaretechnik-Ausbildung konzentriert sich meist auf Modellierungskonzepte, Notationen und Entwicklungswerkzeuge. Die Betrachtung der Modellierungsaktivit{\"a}ten, etwa die Entwicklung und Gegen{\"u}berstellung alternativer Modellvorschl{\"a}ge, steht weniger im Vordergrund. Die vorliegende Studie untersucht zwei Formen des kollaborativen Modellierens am Tabletop in Bezug auf ihren Einfluss auf die Modellierungsaktivit{\"a}ten in kleinen Gruppen. Die Ergebnisse zeigen, dass sowohl selbstorganisierte als auch moderierte Modellierungssitzungen das Entwickeln eines gemeinsamen Modellverst{\"a}ndnisses f{\"o}rdern. In moderierten Sitzungen wurden zudem mehr alternative L{\"o}sungsideen entwickelt und in st{\"a}rkerem Maße diskutiert.}, language = {de} } @phdthesis{AlSaffar2016, author = {Al-Saffar, Loay Talib Ahmed}, title = {Analysing prerequisites, expectations, apprehensions, and attitudes of university students studying Computer science}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-98437}, school = {Universit{\"a}t Potsdam}, pages = {xii, 131}, year = {2016}, abstract = {The main objective of this dissertation is to analyse prerequisites, expectations, apprehensions, and attitudes of students studying computer science, who are willing to gain a bachelor degree. The research will also investigate in the students' learning style according to the Felder-Silverman model. These investigations fall in the attempt to make an impact on reducing the "dropout"/shrinkage rate among students, and to suggest a better learning environment. The first investigation starts with a survey that has been made at the computer science department at the University of Baghdad to investigate the attitudes of computer science students in an environment dominated by women, showing the differences in attitudes between male and female students in different study years. Students are accepted to university studies via a centrally controlled admission procedure depending mainly on their final score at school. This leads to a high percentage of students studying subjects they do not want. Our analysis shows that 75\% of the female students do not regret studying computer science although it was not their first choice. And according to statistics over previous years, women manage to succeed in their study and often graduate on top of their class. We finish with a comparison of attitudes between the freshman students of two different cultures and two different university enrolment procedures (University of Baghdad, in Iraq, and the University of Potsdam, in Germany) both with opposite gender majority. The second step of investigation took place at the department of computer science at the University of Potsdam in Germany and analyzes the learning styles of students studying the three major fields of study offered by the department (computer science, business informatics, and computer science teaching). Investigating the differences in learning styles between the students of those study fields who usually take some joint courses is important to be aware of which changes are necessary to be adopted in the teaching methods to address those different students. It was a two stage study using two questionnaires; the main one is based on the Index of Learning Styles Questionnaire of B. A. Solomon and R. M. Felder, and the second questionnaire was an investigation on the students' attitudes towards the findings of their personal first questionnaire. Our analysis shows differences in the preferences of learning style between male and female students of the different study fields, as well as differences between students with the different specialties (computer science, business informatics, and computer science teaching). The third investigation looks closely into the difficulties, issues, apprehensions and expectations of freshman students studying computer science. The study took place at the computer science department at the University of Potsdam with a volunteer sample of students. The goal is to determine and discuss the difficulties and issues that they are facing in their study that may lead them to think in dropping-out, changing the study field, or changing the university. The research continued with the same sample of students (with business informatics students being the majority) through more than three semesters. Difficulties and issues during the study were documented, as well as students' attitudes, apprehensions, and expectations. Some of the professors and lecturers opinions and solutions to some students' problems were also documented. Many participants had apprehensions and difficulties, especially towards informatics subjects. Some business informatics participants began to think of changing the university, in particular when they reached their third semester, others thought about changing their field of study. Till the end of this research, most of the participants continued in their studies (the study they have started with or the new study they have changed to) without leaving the higher education system.}, language = {en} } @book{NeuhausPolzeChowdhuryy2011, author = {Neuhaus, Christian and Polze, Andreas and Chowdhuryy, Mohammad M. R.}, title = {Survey on healthcare IT systems : standards, regulations and security}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-128-8}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-51463}, publisher = {Universit{\"a}t Potsdam}, pages = {53}, year = {2011}, abstract = {IT systems for healthcare are a complex and exciting field. One the one hand, there is a vast number of improvements and work alleviations that computers can bring to everyday healthcare. Some ways of treatment, diagnoses and organisational tasks were even made possible by computer usage in the first place. On the other hand, there are many factors that encumber computer usage and make development of IT systems for healthcare a challenging, sometimes even frustrating task. These factors are not solely technology-related, but just as well social or economical conditions. This report describes some of the idiosyncrasies of IT systems in the healthcare domain, with a special focus on legal regulations, standards and security.}, language = {en} } @book{AbedjanNaumann2011, author = {Abedjan, Ziawasch and Naumann, Felix}, title = {Advancing the discovery of unique column combinations}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-148-6}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-53564}, publisher = {Universit{\"a}t Potsdam}, pages = {25}, year = {2011}, abstract = {Unique column combinations of a relational database table are sets of columns that contain only unique values. Discovering such combinations is a fundamental research problem and has many different data management and knowledge discovery applications. Existing discovery algorithms are either brute force or have a high memory load and can thus be applied only to small datasets or samples. In this paper, the wellknown GORDIAN algorithm and "Apriori-based" algorithms are compared and analyzed for further optimization. We greatly improve the Apriori algorithms through efficient candidate generation and statistics-based pruning methods. A hybrid solution HCAGORDIAN combines the advantages of GORDIAN and our new algorithm HCA, and it significantly outperforms all previous work in many situations.}, language = {en} } @article{SchlierkampThurner2015, author = {Schlierkamp, Kathrin and Thurner, Veronika}, title = {Was will ich eigentlich hier?}, series = {HDI 2014 : Gestalten von {\"U}berg{\"a}ngen}, volume = {2015}, journal = {HDI 2014 : Gestalten von {\"U}berg{\"a}ngen}, number = {9}, editor = {Schubert, Sigrid and Schwill, Andreas}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-84748}, pages = {179 -- 187}, year = {2015}, abstract = {Die Wahl des richtigen Studienfaches und die daran anschließende Studieneingangsphase sind oft entscheidend f{\"u}r den erfolgreichen Verlauf eines Studiums. Eine große Herausforderung besteht dabei darin, bereits in den ersten Wochen des Studiums bestehende Defizite in vermeintlich einfachen Schl{\"u}sselkompetenzen zu erkennen und diese so bald wie m{\"o}glich zu beheben. Eine zweite, nicht minder wichtige Herausforderung ist es, m{\"o}glichst fr{\"u}hzeitig f{\"u}r jeden einzelnen Studierenden zu erkennen, ob er bzw. sie das individuell richtige Studienfach gew{\"a}hlt hat, das den jeweiligen pers{\"o}nlichen Neigungen, Interessen und F{\"a}higkeiten entspricht und zur Verwirklichung der eigenen Lebensziele beitr{\"a}gt. Denn nur dann sind Studierende ausreichend stark und dauerhaft intrinsisch motiviert, um ein anspruchsvolles, komplexes Studium erfolgreich durchzuziehen. In diesem Beitrag fokussieren wir eine Maßnahme, die die Studierenden an einen Prozess zur systematischen Reflexion des eigenen Lernprozesses und der eigenen Ziele heranf{\"u}hrt und beides in Relation setzt.}, language = {de} } @article{VossebergCzernikErbetal.2015, author = {Vosseberg, Karin and Czernik, Sofie and Erb, Ulrike and Vielhaber, Michael}, title = {Projektorientierte Studieneingangsphase}, series = {HDI 2014 : Gestalten von {\"U}berg{\"a}ngen}, volume = {2015}, journal = {HDI 2014 : Gestalten von {\"U}berg{\"a}ngen}, number = {9}, editor = {Schubert, Sigrid and Schwill, Andreas}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-84730}, pages = {169 -- 177}, year = {2015}, abstract = {Ziel einer neuen Studieneingangsphase ist, den Studierenden bis zum Ende des ersten Semesters ein vielf{\"a}ltiges Berufsbild der Informatik und Wirtschaftsinformatik mit dem breiten Aufgabenspektrum aufzubl{\"a}ttern und damit die Zusammenh{\"a}nge zwischen den einzelnen Modulen des Curriculums zu verdeutlichen. Die Studierenden sollen in die Lage versetzt werden, sehr eigenst{\"a}ndig die Planung und Gestaltung ihres Studiums in die Hand zu nehmen.}, language = {de} } @article{Broeker2015, author = {Br{\"o}ker, Kathrin}, title = {Unterst{\"u}tzung Informatik-Studierender durch ein Lernzentrum}, series = {HDI 2014 : Gestalten von {\"U}berg{\"a}ngen}, volume = {2015}, journal = {HDI 2014 : Gestalten von {\"U}berg{\"a}ngen}, number = {9}, editor = {Schubert, Sigrid and Schwill, Andreas}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-84754}, pages = {189 -- 197}, year = {2015}, abstract = {In diesem Papier wird das Konzept eines Lernzentrums f{\"u}r die Informatik (LZI) an der Universit{\"a}t Paderborn vorgestellt. Ausgehend von den fachspezifischen Schwierigkeiten der Informatik Studierenden werden die Angebote des LZIs erl{\"a}utert, die sich {\"u}ber die vier Bereiche Individuelle Beratung und Betreuung, „Offener Lernraum", Workshops und Lehrveranstaltungen sowie Forschung erstrecken. Eine erste Evaluation mittels Feedbackb{\"o}gen zeigt, dass das Angebot bei den Studierenden positiv aufgenommen wird. Zuk{\"u}nftig soll das Angebot des LZIs weiter ausgebaut und verbessert werden. Ausgangsbasis dazu sind weitere Studien.}, language = {de} } @phdthesis{Prasse2016, author = {Prasse, Paul}, title = {Pattern recognition for computer security}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-100251}, school = {Universit{\"a}t Potsdam}, pages = {VI, 75}, year = {2016}, abstract = {Computer Security deals with the detection and mitigation of threats to computer networks, data, and computing hardware. This thesis addresses the following two computer security problems: email spam campaign and malware detection. Email spam campaigns can easily be generated using popular dissemination tools by specifying simple grammars that serve as message templates. A grammar is disseminated to nodes of a bot net, the nodes create messages by instantiating the grammar at random. Email spam campaigns can encompass huge data volumes and therefore pose a threat to the stability of the infrastructure of email service providers that have to store them. Malware -software that serves a malicious purpose- is affecting web servers, client computers via active content, and client computers through executable files. Without the help of malware detection systems it would be easy for malware creators to collect sensitive information or to infiltrate computers. The detection of threats -such as email-spam messages, phishing messages, or malware- is an adversarial and therefore intrinsically difficult problem. Threats vary greatly and evolve over time. The detection of threats based on manually-designed rules is therefore difficult and requires a constant engineering effort. Machine-learning is a research area that revolves around the analysis of data and the discovery of patterns that describe aspects of the data. Discriminative learning methods extract prediction models from data that are optimized to predict a target attribute as accurately as possible. Machine-learning methods hold the promise of automatically identifying patterns that robustly and accurately detect threats. This thesis focuses on the design and analysis of discriminative learning methods for the two computer-security problems under investigation: email-campaign and malware detection. The first part of this thesis addresses email-campaign detection. We focus on regular expressions as a syntactic framework, because regular expressions are intuitively comprehensible by security engineers and administrators, and they can be applied as a detection mechanism in an extremely efficient manner. In this setting, a prediction model is provided with exemplary messages from an email-spam campaign. The prediction model has to generate a regular expression that reveals the syntactic pattern that underlies the entire campaign, and that a security engineers finds comprehensible and feels confident enough to use the expression to blacklist further messages at the email server. We model this problem as two-stage learning problem with structured input and output spaces which can be solved using standard cutting plane methods. Therefore we develop an appropriate loss function, and derive a decoder for the resulting optimization problem. The second part of this thesis deals with the problem of predicting whether a given JavaScript or PHP file is malicious or benign. Recent malware analysis techniques use static or dynamic features, or both. In fully dynamic analysis, the software or script is executed and observed for malicious behavior in a sandbox environment. By contrast, static analysis is based on features that can be extracted directly from the program file. In order to bypass static detection mechanisms, code obfuscation techniques are used to spread a malicious program file in many different syntactic variants. Deobfuscating the code before applying a static classifier can be subjected to mostly static code analysis and can overcome the problem of obfuscated malicious code, but on the other hand increases the computational costs of malware detection by an order of magnitude. In this thesis we present a cascaded architecture in which a classifier first performs a static analysis of the original code and -based on the outcome of this first classification step- the code may be deobfuscated and classified again. We explore several types of features including token \$n\$-grams, orthogonal sparse bigrams, subroutine-hashings, and syntax-tree features and study the robustness of detection methods and feature types against the evolution of malware over time. The developed tool scans very large file collections quickly and accurately. Each model is evaluated on real-world data and compared to reference methods. Our approach of inferring regular expressions to filter emails belonging to an email spam campaigns leads to models with a high true-positive rate at a very low false-positive rate that is an order of magnitude lower than that of a commercial content-based filter. Our presented system -REx-SVMshort- is being used by a commercial email service provider and complements content-based and IP-address based filtering. Our cascaded malware detection system is evaluated on a high-quality data set of almost 400,000 conspicuous PHP files and a collection of more than 1,00,000 JavaScript files. From our case study we can conclude that our system can quickly and accurately process large data collections at a low false-positive rate.}, language = {en} } @phdthesis{Abedjan2014, author = {Abedjan, Ziawasch}, title = {Improving RDF data with data mining}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-71334}, school = {Universit{\"a}t Potsdam}, year = {2014}, abstract = {Linked Open Data (LOD) comprises very many and often large public data sets and knowledge bases. Those datasets are mostly presented in the RDF triple structure of subject, predicate, and object, where each triple represents a statement or fact. Unfortunately, the heterogeneity of available open data requires significant integration steps before it can be used in applications. Meta information, such as ontological definitions and exact range definitions of predicates, are desirable and ideally provided by an ontology. However in the context of LOD, ontologies are often incomplete or simply not available. Thus, it is useful to automatically generate meta information, such as ontological dependencies, range definitions, and topical classifications. Association rule mining, which was originally applied for sales analysis on transactional databases, is a promising and novel technique to explore such data. We designed an adaptation of this technique for min-ing Rdf data and introduce the concept of "mining configurations", which allows us to mine RDF data sets in various ways. Different configurations enable us to identify schema and value dependencies that in combination result in interesting use cases. To this end, we present rule-based approaches for auto-completion, data enrichment, ontology improvement, and query relaxation. Auto-completion remedies the problem of inconsistent ontology usage, providing an editing user with a sorted list of commonly used predicates. A combination of different configurations step extends this approach to create completely new facts for a knowledge base. We present two approaches for fact generation, a user-based approach where a user selects the entity to be amended with new facts and a data-driven approach where an algorithm discovers entities that have to be amended with missing facts. As knowledge bases constantly grow and evolve, another approach to improve the usage of RDF data is to improve existing ontologies. Here, we present an association rule based approach to reconcile ontology and data. Interlacing different mining configurations, we infer an algorithm to discover synonymously used predicates. Those predicates can be used to expand query results and to support users during query formulation. We provide a wide range of experiments on real world datasets for each use case. The experiments and evaluations show the added value of association rule mining for the integration and usability of RDF data and confirm the appropriateness of our mining configuration methodology.}, language = {en} } @inproceedings{OPUS4-7665, title = {Proceedings of the Second HPI Cloud Symposium "Operating the Cloud" 2014}, number = {94}, editor = {Bosse, Sascha and Elsaid, Mohamed Esam and Feinbube, Frank and M{\"u}ller, Hendrik}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-319-0}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-76654}, pages = {vii, 59}, year = {2015}, abstract = {Every year, the Hasso Plattner Institute (HPI) invites guests from industry and academia to a collaborative scientific workshop on the topic "Operating the Cloud". Our goal is to provide a forum for the exchange of knowledge and experience between industry and academia. Hence, HPI's Future SOC Lab is the adequate environment to host this event which is also supported by BITKOM. On the occasion of this workshop we called for submissions of research papers and practitioners' reports. "Operating the Cloud" aims to be a platform for productive discussions of innovative ideas, visions, and upcoming technologies in the field of cloud operation and administration. In this workshop proceedings the results of the second HPI cloud symposium "Operating the Cloud" 2014 are published. We thank the authors for exciting presentations and insights into their current work and research. Moreover, we look forward to more interesting submissions for the upcoming symposium in 2015.}, language = {en} } @phdthesis{Videla2014, author = {Videla, Santiago}, title = {Reasoning on the response of logical signaling networks with answer set programming}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-71890}, school = {Universit{\"a}t Potsdam}, year = {2014}, abstract = {Deciphering the functioning of biological networks is one of the central tasks in systems biology. In particular, signal transduction networks are crucial for the understanding of the cellular response to external and internal perturbations. Importantly, in order to cope with the complexity of these networks, mathematical and computational modeling is required. We propose a computational modeling framework in order to achieve more robust discoveries in the context of logical signaling networks. More precisely, we focus on modeling the response of logical signaling networks by means of automated reasoning using Answer Set Programming (ASP). ASP provides a declarative language for modeling various knowledge representation and reasoning problems. Moreover, available ASP solvers provide several reasoning modes for assessing the multitude of answer sets. Therefore, leveraging its rich modeling language and its highly efficient solving capacities, we use ASP to address three challenging problems in the context of logical signaling networks: learning of (Boolean) logical networks, experimental design, and identification of intervention strategies. Overall, the contribution of this thesis is three-fold. Firstly, we introduce a mathematical framework for characterizing and reasoning on the response of logical signaling networks. Secondly, we contribute to a growing list of successful applications of ASP in systems biology. Thirdly, we present a software providing a complete pipeline for automated reasoning on the response of logical signaling networks.}, language = {en} } @book{MeyerWeske2014, author = {Meyer, Andreas and Weske, Mathias}, title = {Weak conformance between process models and synchronized object life cycles}, number = {91}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-303-9}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-71722}, publisher = {Universit{\"a}t Potsdam}, pages = {31}, year = {2014}, abstract = {Process models specify behavioral execution constraints between activities as well as between activities and data objects. A data object is characterized by its states and state transitions represented as object life cycle. For process execution, all behavioral execution constraints must be correct. Correctness can be verified via soundness checking which currently only considers control flow information. For data correctness, conformance between a process model and its object life cycles is checked. Current approaches abstract from dependencies between multiple data objects and require fully specified process models although, in real-world process repositories, often underspecified models are found. Coping with these issues, we introduce the concept of synchronized object life cycles and we define a mapping of data constraints of a process model to Petri nets extending an existing mapping. Further, we apply the notion of weak conformance to process models to tell whether each time an activity needs to access a data object in a particular state, it is guaranteed that the data object is in or can reach the expected state. Then, we introduce an algorithm for an integrated verification of control flow correctness and weak data conformance using soundness checking.}, language = {en} } @article{WesselsMetzger2015, author = {Weßels, Doris and Metzger, Christiane}, title = {Die Arbeitswelt im Fokus}, series = {HDI 2014 : Gestalten von {\"U}berg{\"a}ngen}, volume = {2015}, journal = {HDI 2014 : Gestalten von {\"U}berg{\"a}ngen}, number = {9}, editor = {Schwill, Andreas and Schubert, Sigrid}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-80289}, pages = {77 -- 92}, year = {2015}, abstract = {F{\"u}r Bachelor-Studierende der Wirtschaftsinformatik im zweiten Semester an der Fachhochschule Kiel werden im Modul Informationsmanagement neben klassischen didaktischen Ans{\"a}tzen in einer seminaristischen Unterrichtsform so genannte „Aktivbausteine" eingesetzt: Studierende erhalten zum einen die Gelegenheit, sich im Kontakt mit Fach- und F{\"u}hrungskr{\"a}ften aus der Industrie ein konkretes Bild vom Beruf der Wirtschaftsinformatikerin bzw. des Wirtschaftsinformatikers zu machen; zum anderen erarbeiten sie innovative Ans{\"a}tze der Prozessverbesserung aus Sicht der IT oder mit Nutzenpotenzial f{\"u}r die IT und pr{\"a}sentieren ihre Ergebnisse {\"o}ffentlich im Rahmen des Kieler Prozessmanagementforums. Diese Aktivbausteine dienen insbesondere der Berufsfeldorientierung: Durch die Informationen, die die Studierenden {\"u}ber die Anforderungen und T{\"a}tigkeiten von im Beruf stehenden Menschen erhalten, werden sie in die Lage versetzt, fundierte Entscheidungen bzgl. ihrer Studiengestaltung und Berufswahl zu treffen. Im Beitrag wird die Konzeption der Bausteine vorgestellt und deren Grad der Zielerreichung durch aktuelle Evaluationsergebnisse erl{\"a}utert. Zudem wird die motivationale Wirkung der Aktivbausteine anhand der Theorie der Selbstbestimmung von Deci und Ryan [DR1985, DR1993, DR2004] erl{\"a}utert.}, language = {de} } @phdthesis{Haider2013, author = {Haider, Peter}, title = {Prediction with Mixture Models}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-69617}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {Learning a model for the relationship between the attributes and the annotated labels of data examples serves two purposes. Firstly, it enables the prediction of the label for examples without annotation. Secondly, the parameters of the model can provide useful insights into the structure of the data. If the data has an inherent partitioned structure, it is natural to mirror this structure in the model. Such mixture models predict by combining the individual predictions generated by the mixture components which correspond to the partitions in the data. Often the partitioned structure is latent, and has to be inferred when learning the mixture model. Directly evaluating the accuracy of the inferred partition structure is, in many cases, impossible because the ground truth cannot be obtained for comparison. However it can be assessed indirectly by measuring the prediction accuracy of the mixture model that arises from it. This thesis addresses the interplay between the improvement of predictive accuracy by uncovering latent cluster structure in data, and further addresses the validation of the estimated structure by measuring the accuracy of the resulting predictive model. In the application of filtering unsolicited emails, the emails in the training set are latently clustered into advertisement campaigns. Uncovering this latent structure allows filtering of future emails with very low false positive rates. In order to model the cluster structure, a Bayesian clustering model for dependent binary features is developed in this thesis. Knowing the clustering of emails into campaigns can also aid in uncovering which emails have been sent on behalf of the same network of captured hosts, so-called botnets. This association of emails to networks is another layer of latent clustering. Uncovering this latent structure allows service providers to further increase the accuracy of email filtering and to effectively defend against distributed denial-of-service attacks. To this end, a discriminative clustering model is derived in this thesis that is based on the graph of observed emails. The partitionings inferred using this model are evaluated through their capacity to predict the campaigns of new emails. Furthermore, when classifying the content of emails, statistical information about the sending server can be valuable. Learning a model that is able to make use of it requires training data that includes server statistics. In order to also use training data where the server statistics are missing, a model that is a mixture over potentially all substitutions thereof is developed. Another application is to predict the navigation behavior of the users of a website. Here, there is no a priori partitioning of the users into clusters, but to understand different usage scenarios and design different layouts for them, imposing a partitioning is necessary. The presented approach simultaneously optimizes the discriminative as well as the predictive power of the clusters. Each model is evaluated on real-world data and compared to baseline methods. The results show that explicitly modeling the assumptions about the latent cluster structure leads to improved predictions compared to the baselines. It is beneficial to incorporate a small number of hyperparameters that can be tuned to yield the best predictions in cases where the prediction accuracy can not be optimized directly.}, language = {en} } @book{FelgentreffBorningHirschfeld2013, author = {Felgentreff, Tim and Borning, Alan and Hirschfeld, Robert}, title = {Babelsberg : specifying and solving constraints on object behavior}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-265-0}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-67296}, publisher = {Universit{\"a}t Potsdam}, pages = {53}, year = {2013}, abstract = {Constraints allow developers to specify desired properties of systems in a number of domains, and have those properties be maintained automatically. This results in compact, declarative code, avoiding scattered code to check and imperatively re-satisfy invariants. Despite these advantages, constraint programming is not yet widespread, with standard imperative programming still the norm. There is a long history of research on integrating constraint programming with the imperative paradigm. However, this integration typically does not unify the constructs for encapsulation and abstraction from both paradigms. This impedes re-use of modules, as client code written in one paradigm can only use modules written to support that paradigm. Modules require redundant definitions if they are to be used in both paradigms. We present a language - Babelsberg - that unifies the constructs for en- capsulation and abstraction by using only object-oriented method definitions for both declarative and imperative code. Our prototype - Babelsberg/R - is an extension to Ruby, and continues to support Ruby's object-oriented se- mantics. It allows programmers to add constraints to existing Ruby programs in incremental steps by placing them on the results of normal object-oriented message sends. It is implemented by modifying a state-of-the-art Ruby virtual machine. The performance of standard object-oriented code without con- straints is only modestly impacted, with typically less than 10\% overhead compared with the unmodified virtual machine. Furthermore, our architec- ture for adding multiple constraint solvers allows Babelsberg to deal with constraints in a variety of domains. We argue that our approach provides a useful step toward making con- straint solving a generic tool for object-oriented programmers. We also provide example applications, written in our Ruby-based implementation, which use constraints in a variety of application domains, including interactive graphics, circuit simulations, data streaming with both hard and soft constraints on performance, and configuration file Management.}, language = {en} } @book{PufahlMeyerWeske2013, author = {Pufahl, Luise and Meyer, Andreas and Weske, Mathias}, title = {Batch regions : process instance synchronization based on data}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-280-3}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-69081}, publisher = {Universit{\"a}t Potsdam}, pages = {18}, year = {2013}, abstract = {Business process automation improves organizations' efficiency to perform work. In existing business process management systems, process instances run independently from each other. However, synchronizing instances carrying similar characteristics, i.e., sharing the same data, can reduce process execution costs. For example, if an online retailer receives two orders from one customer, there is a chance that they can be packed and shipped together to save shipment costs. In this paper, we use concepts from the database domain and introduce data views to business processes to identify instances which can be synchronized. Based on data views, we introduce the concept of batch regions for a context-aware instance synchronization over a set of connected activities. We also evaluate the concepts introduced in this paper with a case study comparing costs for normal and batch processing.}, language = {de} } @book{GieseBecker2013, author = {Giese, Holger and Becker, Basil}, title = {Modeling and verifying dynamic evolving service-oriented architectures}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-246-9}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-65112}, publisher = {Universit{\"a}t Potsdam}, pages = {97}, year = {2013}, abstract = {The service-oriented architecture supports the dynamic assembly and runtime reconfiguration of complex open IT landscapes by means of runtime binding of service contracts, launching of new components and termination of outdated ones. Furthermore, the evolution of these IT landscapes is not restricted to exchanging components with other ones using the same service contracts, as new services contracts can be added as well. However, current approaches for modeling and verification of service-oriented architectures do not support these important capabilities to their full extend.In this report we present an extension of the current OMG proposal for service modeling with UML - SoaML - which overcomes these limitations. It permits modeling services and their service contracts at different levels of abstraction, provides a formal semantics for all modeling concepts, and enables verifying critical properties. Our compositional and incremental verification approach allows for complex properties including communication parameters and time and covers besides the dynamic binding of service contracts and the replacement of components also the evolution of the systems by means of new service contracts. The modeling as well as verification capabilities of the presented approach are demonstrated by means of a supply chain example and the verification results of a first prototype are shown.}, language = {en} } @book{RoggeSoltiMansvanderAalstetal.2013, author = {Rogge-Solti, Andreas and Mans, Ronny S. and van der Aalst, Wil M. P. and Weske, Mathias}, title = {Repairing event logs using stochastic process models}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-258-2}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-66797}, publisher = {Universit{\"a}t Potsdam}, pages = {19}, year = {2013}, abstract = {Companies strive to improve their business processes in order to remain competitive. Process mining aims to infer meaningful insights from process-related data and attracted the attention of practitioners, tool-vendors, and researchers in recent years. Traditionally, event logs are assumed to describe the as-is situation. But this is not necessarily the case in environments where logging may be compromised due to manual logging. For example, hospital staff may need to manually enter information regarding the patient's treatment. As a result, events or timestamps may be missing or incorrect. In this paper, we make use of process knowledge captured in process models, and provide a method to repair missing events in the logs. This way, we facilitate analysis of incomplete logs. We realize the repair by combining stochastic Petri nets, alignments, and Bayesian networks. We evaluate the results using both synthetic data and real event data from a Dutch hospital.}, language = {en} } @book{OPUS4-6813, title = {Cloud security mechanisms}, number = {87}, editor = {Neuhaus, Christian and Polze, Andreas}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-281-0}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-68168}, publisher = {Universit{\"a}t Potsdam}, pages = {78}, year = {2014}, abstract = {Cloud computing has brought great benefits in cost and flexibility for provisioning services. The greatest challenge of cloud computing remains however the question of security. The current standard tools in access control mechanisms and cryptography can only partly solve the security challenges of cloud infrastructures. In the recent years of research in security and cryptography, novel mechanisms, protocols and algorithms have emerged that offer new ways to create secure services atop cloud infrastructures. This report provides introductions to a selection of security mechanisms that were part of the "Cloud Security Mechanisms" seminar in summer term 2013 at HPI.}, language = {en} } @phdthesis{Kunze2013, author = {Kunze, Matthias}, title = {Searching business process models by example}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-68844}, school = {Universit{\"a}t Potsdam}, year = {2013}, abstract = {Business processes are fundamental to the operations of a company. Each product manufactured and every service provided is the result of a series of actions that constitute a business process. Business process management is an organizational principle that makes the processes of a company explicit and offers capabilities to implement procedures, control their execution, analyze their performance, and improve them. Therefore, business processes are documented as process models that capture these actions and their execution ordering, and make them accessible to stakeholders. As these models are an essential knowledge asset, they need to be managed effectively. In particular, the discovery and reuse of existing knowledge becomes challenging in the light of companies maintaining hundreds and thousands of process models. In practice, searching process models has been solved only superficially by means of free-text search of process names and their descriptions. Scientific contributions are limited in their scope, as they either present measures for process similarity or elaborate on query languages to search for particular aspects. However, they fall short in addressing efficient search, the presentation of search results, and the support to reuse discovered models. This thesis presents a novel search method, where a query is expressed by an exemplary business process model that describes the behavior of a possible answer. This method builds upon a formal framework that captures and compares the behavior of process models by the execution ordering of actions. The framework contributes a conceptual notion of behavioral distance that quantifies commonalities and differences of a pair of process models, and enables process model search. Based on behavioral distances, a set of measures is proposed that evaluate the quality of a particular search result to guide the user in assessing the returned matches. A projection of behavioral aspects to a process model enables highlighting relevant fragments that led to a match and facilitates its reuse. The thesis further elaborates on two search techniques that provide concrete behavioral distance functions as an instantiation of the formal framework. Querying enables search with a notion of behavioral inclusion with regard to the query. In contrast, similarity search obtains process models that are similar to a query, even if the query is not precisely matched. For both techniques, indexes are presented that enable efficient search. Methods to evaluate the quality and performance of process model search are introduced and applied to the techniques of this thesis. They show good results with regard to human assessment and scalability in a practical setting.}, language = {en} } @book{PapeTrefferHirschfeldetal.2013, author = {Pape, Tobias and Treffer, Arian and Hirschfeld, Robert and Haupt, Michael}, title = {Extending a Java Virtual Machine to Dynamic Object-oriented Languages}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-266-7}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-67438}, publisher = {Universit{\"a}t Potsdam}, pages = {163}, year = {2013}, abstract = {There are two common approaches to implement a virtual machine (VM) for a dynamic object-oriented language. On the one hand, it can be implemented in a C-like language for best performance and maximum control over the resulting executable. On the other hand, it can be implemented in a language such as Java that allows for higher-level abstractions. These abstractions, such as proper object-oriented modularization, automatic memory management, or interfaces, are missing in C-like languages but they can simplify the implementation of prevalent but complex concepts in VMs, such as garbage collectors (GCs) or just-in-time compilers (JITs). Yet, the implementation of a dynamic object-oriented language in Java eventually results in two VMs on top of each other (double stack), which impedes performance. For statically typed languages, the Maxine VM solves this problem; it is written in Java but can be executed without a Java virtual machine (JVM). However, it is currently not possible to execute dynamic object-oriented languages in Maxine. This work presents an approach to bringing object models and execution models of dynamic object-oriented languages to the Maxine VM and the application of this approach to Squeak/Smalltalk. The representation of objects in and the execution of dynamic object-oriented languages pose certain challenges to the Maxine VM that lacks certain variation points necessary to enable an effortless and straightforward implementation of dynamic object-oriented languages' execution models. The implementation of Squeak/Smalltalk in Maxine as a feasibility study is to unveil such missing variation points.}, language = {en} } @misc{KaminskiSchaubSiegeletal.2013, author = {Kaminski, Roland and Schaub, Torsten H. and Siegel, Anne and Videla, Santiago}, title = {Minimal intervention strategies in logical signaling networks with ASP}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch Naturwissenschaftliche Reihe}, number = {4-5}, issn = {1866-8372}, doi = {10.25932/publishup-41570}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-415704}, pages = {675 -- 690}, year = {2013}, abstract = {Proposing relevant perturbations to biological signaling networks is central to many problems in biology and medicine because it allows for enabling or disabling certain biological outcomes. In contrast to quantitative methods that permit fine-grained (kinetic) analysis, qualitative approaches allow for addressing large-scale networks. This is accomplished by more abstract representations such as logical networks. We elaborate upon such a qualitative approach aiming at the computation of minimal interventions in logical signaling networks relying on Kleene's three-valued logic and fixpoint semantics. We address this problem within answer set programming and show that it greatly outperforms previous work using dedicated algorithms.}, language = {en} } @article{KeverpuetzKueppers2018, author = {Keverp{\"u}tz, Claudia and K{\"u}ppers, Bastian}, title = {Konsistente Lehr-Lern-Prozesse in der dualen IT-Ausbildung}, series = {Commentarii informaticae didacticae}, journal = {Commentarii informaticae didacticae}, number = {12}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-416341}, pages = {91 -- 100}, year = {2018}, abstract = {In der dualen IT-Ausbildung als Verbindung von beruflicher und akademischer Qualifikation werden die berufstypischen Werkzeuge, wie z. B. Laptops, ebenso in den Lehr-Lern-Prozessen der akademischen Unterrichtseinheiten eingesetzt. Im Pr{\"u}fungswesen wird oft auf klassische Papierklausuren zur{\"u}ckgegriffen. Unterrichtseinheiten mit hohem Blended-Learning-Anteil ohne E-Pr{\"u}fung werden dabei als „nicht konsistent" wahrgenommen. In diesem Artikel wird eine empirische Studie dargelegt, die untersucht, welche Einfl{\"u}sse aus der pers{\"o}nlichen Lernbiografie bei den Lehrenden in einer dualen IT-Ausbildung dazu f{\"u}hren k{\"o}nnen, die M{\"o}glichkeiten eines E-Assessments als summative Modulpr{\"u}fung anzunehmen oder abzulehnen. Beispielhaft wurden in der dargelegten Studie Interviews mit Dozenten gef{\"u}hrt und diese hinsichtlich der Verbindung zwischen Lernbiografie, Gestaltung der Didaktik der Lehr-Lern-Prozesse, Zufriedenheit und Ver{\"a}nderungsbereitschaft untersucht.}, language = {de} } @phdthesis{Klimke2018, author = {Klimke, Jan}, title = {Web-based provisioning and application of large-scale virtual 3D city models}, doi = {10.25932/publishup-42805}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-428053}, school = {Universit{\"a}t Potsdam}, pages = {xiii, 141}, year = {2018}, abstract = {Virtual 3D city models represent and integrate a variety of spatial data and georeferenced data related to urban areas. With the help of improved remote-sensing technology, official 3D cadastral data, open data or geodata crowdsourcing, the quantity and availability of such data are constantly expanding and its quality is ever improving for many major cities and metropolitan regions. There are numerous fields of applications for such data, including city planning and development, environmental analysis and simulation, disaster and risk management, navigation systems, and interactive city maps. The dissemination and the interactive use of virtual 3D city models represent key technical functionality required by nearly all corresponding systems, services, and applications. The size and complexity of virtual 3D city models, their management, their handling, and especially their visualization represent challenging tasks. For example, mobile applications can hardly handle these models due to their massive data volume and data heterogeneity. Therefore, the efficient usage of all computational resources (e.g., storage, processing power, main memory, and graphics hardware, etc.) is a key requirement for software engineering in this field. Common approaches are based on complex clients that require the 3D model data (e.g., 3D meshes and 2D textures) to be transferred to them and that then render those received 3D models. However, these applications have to implement most stages of the visualization pipeline on client side. Thus, as high-quality 3D rendering processes strongly depend on locally available computer graphics resources, software engineering faces the challenge of building robust cross-platform client implementations. Web-based provisioning aims at providing a service-oriented software architecture that consists of tailored functional components for building web-based and mobile applications that manage and visualize virtual 3D city models. This thesis presents corresponding concepts and techniques for web-based provisioning of virtual 3D city models. In particular, it introduces services that allow us to efficiently build applications for virtual 3D city models based on a fine-grained service concept. The thesis covers five main areas: 1. A Service-Based Concept for Image-Based Provisioning of Virtual 3D City Models It creates a frame for a broad range of services related to the rendering and image-based dissemination of virtual 3D city models. 2. 3D Rendering Service for Virtual 3D City Models This service provides efficient, high-quality 3D rendering functionality for virtual 3D city models. In particular, it copes with requirements such as standardized data formats, massive model texturing, detailed 3D geometry, access to associated feature data, and non-assumed frame-to-frame coherence for parallel service requests. In addition, it supports thematic and artistic styling based on an expandable graphics effects library. 3. Layered Map Service for Virtual 3D City Models It generates a map-like representation of virtual 3D city models using an oblique view. It provides high visual quality, fast initial loading times, simple map-based interaction and feature data access. Based on a configurable client framework, mobile and web-based applications for virtual 3D city models can be created easily. 4. Video Service for Virtual 3D City Models It creates and synthesizes videos from virtual 3D city models. Without requiring client-side 3D rendering capabilities, users can create camera paths by a map-based user interface, configure scene contents, styling, image overlays, text overlays, and their transitions. The service significantly reduces the manual effort typically required to produce such videos. The videos can automatically be updated when the underlying data changes. 5. Service-Based Camera Interaction It supports task-based 3D camera interactions, which can be integrated seamlessly into service-based visualization applications. It is demonstrated how to build such web-based interactive applications for virtual 3D city models using this camera service. These contributions provide a framework for design, implementation, and deployment of future web-based applications, systems, and services for virtual 3D city models. The approach shows how to decompose the complex, monolithic functionality of current 3D geovisualization systems into independently designed, implemented, and operated service- oriented units. In that sense, this thesis also contributes to microservice architectures for 3D geovisualization systems—a key challenge of today's IT systems engineering to build scalable IT solutions.}, language = {en} } @phdthesis{Richter2018, author = {Richter, Rico}, title = {Concepts and techniques for processing and rendering of massive 3D point clouds}, doi = {10.25932/publishup-42330}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-423304}, school = {Universit{\"a}t Potsdam}, pages = {v, 131}, year = {2018}, abstract = {Remote sensing technology, such as airborne, mobile, or terrestrial laser scanning, and photogrammetric techniques, are fundamental approaches for efficient, automatic creation of digital representations of spatial environments. For example, they allow us to generate 3D point clouds of landscapes, cities, infrastructure networks, and sites. As essential and universal category of geodata, 3D point clouds are used and processed by a growing number of applications, services, and systems such as in the domains of urban planning, landscape architecture, environmental monitoring, disaster management, virtual geographic environments as well as for spatial analysis and simulation. While the acquisition processes for 3D point clouds become more and more reliable and widely-used, applications and systems are faced with more and more 3D point cloud data. In addition, 3D point clouds, by their very nature, are raw data, i.e., they do not contain any structural or semantics information. Many processing strategies common to GIS such as deriving polygon-based 3D models generally do not scale for billions of points. GIS typically reduce data density and precision of 3D point clouds to cope with the sheer amount of data, but that results in a significant loss of valuable information at the same time. This thesis proposes concepts and techniques designed to efficiently store and process massive 3D point clouds. To this end, object-class segmentation approaches are presented to attribute semantics to 3D point clouds, used, for example, to identify building, vegetation, and ground structures and, thus, to enable processing, analyzing, and visualizing 3D point clouds in a more effective and efficient way. Similarly, change detection and updating strategies for 3D point clouds are introduced that allow for reducing storage requirements and incrementally updating 3D point cloud databases. In addition, this thesis presents out-of-core, real-time rendering techniques used to interactively explore 3D point clouds and related analysis results. All techniques have been implemented based on specialized spatial data structures, out-of-core algorithms, and GPU-based processing schemas to cope with massive 3D point clouds having billions of points. All proposed techniques have been evaluated and demonstrated their applicability to the field of geospatial applications and systems, in particular for tasks such as classification, processing, and visualization. Case studies for 3D point clouds of entire cities with up to 80 billion points show that the presented approaches open up new ways to manage and apply large-scale, dense, and time-variant 3D point clouds as required by a rapidly growing number of applications and systems.}, language = {en} } @misc{GebserSchaubThieleetal.2011, author = {Gebser, Martin and Schaub, Torsten H. and Thiele, Sven and Veber, Philippe}, title = {Detecting inconsistencies in large biological networks with answer set programming}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {561}, issn = {1866-8372}, doi = {10.25932/publishup-41246}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-412467}, pages = {38}, year = {2011}, abstract = {We introduce an approach to detecting inconsistencies in large biological networks by using answer set programming. To this end, we build upon a recently proposed notion of consistency between biochemical/genetic reactions and high-throughput profiles of cell activity. We then present an approach based on answer set programming to check the consistency of large-scale data sets. Moreover, we extend this methodology to provide explanations for inconsistencies by determining minimal representations of conflicts. In practice, this can be used to identify unreliable data or to indicate missing reactions.}, language = {en} } @misc{DurzinskyMarwanOstrowskietal.2011, author = {Durzinsky, Markus and Marwan, Wolfgang and Ostrowski, Max and Schaub, Torsten H. and Wagler, Annegret}, title = {Automatic network reconstruction using ASP}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {560}, issn = {1866-8372}, doi = {10.25932/publishup-41241}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-412419}, pages = {18}, year = {2011}, abstract = {Building biological models by inferring functional dependencies from experimental data is an important issue in Molecular Biology. To relieve the biologist from this traditionally manual process, various approaches have been proposed to increase the degree of automation. However, available approaches often yield a single model only, rely on specific assumptions, and/or use dedicated, heuristic algorithms that are intolerant to changing circumstances or requirements in the view of the rapid progress made in Biotechnology. Our aim is to provide a declarative solution to the problem by appeal to Answer Set Programming (ASP) overcoming these difficulties. We build upon an existing approach to Automatic Network Reconstruction proposed by part of the authors. This approach has firm mathematical foundations and is well suited for ASP due to its combinatorial flavor providing a characterization of all models explaining a set of experiments. The usage of ASP has several benefits over the existing heuristic algorithms. First, it is declarative and thus transparent for biological experts. Second, it is elaboration tolerant and thus allows for an easy exploration and incorporation of biological constraints. Third, it allows for exploring the entire space of possible models. Finally, our approach offers an excellent performance, matching existing, special-purpose systems.}, language = {en} } @misc{EhrigGolasHabeletal.2014, author = {Ehrig, Hartmut and Golas, Ulrike and Habel, Annegret and Lambers, Leen and Orejas, Fernando}, title = {M-adhesive transformation systems with nested application conditions}, series = {Postprints der Universit{\"a}t Potsdam : Digital Engineering Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Digital Engineering Reihe}, number = {001}, doi = {10.25932/publishup-41565}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-415651}, pages = {50}, year = {2014}, abstract = {Nested application conditions generalise the well-known negative application conditions and are important for several application domains. In this paper, we present Local Church-Rosser, Parallelism, Concurrency and Amalgamation Theorems for rules with nested application conditions in the framework of M-adhesive categories, where M-adhesive categories are slightly more general than weak adhesive high-level replacement categories. Most of the proofs are based on the corresponding statements for rules without application conditions and two shift lemmas stating that nested application conditions can be shifted over morphisms and rules.}, language = {en} } @phdthesis{Dick2016, author = {Dick, Uwe}, title = {Discriminative Classification Models for Internet Security}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-102593}, school = {Universit{\"a}t Potsdam}, pages = {x, 57}, year = {2016}, abstract = {Services that operate over the Internet are under constant threat of being exposed to fraudulent use. Maintaining good user experience for legitimate users often requires the classification of entities as malicious or legitimate in order to initiate countermeasures. As an example, inbound email spam filters decide for spam or non-spam. They can base their decision on both the content of each email as well as on features that summarize prior emails received from the sending server. In general, discriminative classification methods learn to distinguish positive from negative entities. Each decision for a label may be based on features of the entity and related entities. When labels of related entities have strong interdependencies---as can be assumed e.g. for emails being delivered by the same user---classification decisions should not be made independently and dependencies should be modeled in the decision function. This thesis addresses the formulation of discriminative classification problems that are tailored for the specific demands of the following three Internet security applications. Theoretical and algorithmic solutions are devised to protect an email service against flooding of user inboxes, to mitigate abusive usage of outbound email servers, and to protect web servers against distributed denial of service attacks. In the application of filtering an inbound email stream for unsolicited emails, utilizing features that go beyond each individual email's content can be valuable. Information about each sending mail server can be aggregated over time and may help in identifying unwanted emails. However, while this information will be available to the deployed email filter, some parts of the training data that are compiled by third party providers may not contain this information. The missing features have to be estimated at training time in order to learn a classification model. In this thesis an algorithm is derived that learns a decision function that integrates over a distribution of values for each missing entry. The distribution of missing values is a free parameter that is optimized to learn an optimal decision function. The outbound stream of emails of an email service provider can be separated by the customer IDs that ask for delivery. All emails that are sent by the same ID in the same period of time are related, both in content and in label. Hijacked customer accounts may send batches of unsolicited emails to other email providers, which in turn might blacklist the sender's email servers after detection of incoming spam emails. The risk of being blocked from further delivery depends on the rate of outgoing unwanted emails and the duration of high spam sending rates. An optimization problem is developed that minimizes the expected cost for the email provider by learning a decision function that assigns a limit on the sending rate to customers based on the each customer's email stream. Identifying attacking IPs during HTTP-level DDoS attacks allows to block those IPs from further accessing the web servers. DDoS attacks are usually carried out by infected clients that are members of the same botnet and show similar traffic patterns. HTTP-level attacks aim at exhausting one or more resources of the web server infrastructure, such as CPU time. If the joint set of attackers cannot increase resource usage close to the maximum capacity, no effect will be experienced by legitimate users of hosted web sites. However, if the additional load raises the computational burden towards the critical range, user experience will degrade until service may be unavailable altogether. As the loss of missing one attacker depends on block decisions for other attackers---if most other attackers are detected, not blocking one client will likely not be harmful---a structured output model has to be learned. In this thesis an algorithm is developed that learns a structured prediction decoder that searches the space of label assignments, guided by a policy. Each model is evaluated on real-world data and is compared to reference methods. The results show that modeling each classification problem according to the specific demands of the task improves performance over solutions that do not consider the constraints inherent to an application.}, language = {en} } @article{Rolf2010, author = {Rolf, Arno}, title = {Themeng{\"a}rten in der Informatik-Ausbildung}, series = {Commentarii informaticae didacticae : (CID)}, journal = {Commentarii informaticae didacticae : (CID)}, number = {4}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-64281}, pages = {7 -- 12}, year = {2010}, abstract = {Die M{\"o}glichkeiten sich zu informieren, am Leben der vielen Anderen teilzunehmen ist durch das Internet mit seinen Tweets, Google-Angeboten und sozialen Netzwerken wie Facebook ins Unermessliche gewachsen. Zugleich f{\"u}hlen sich viele Nutzer {\"u}berfordert und meinen, im Meer der Informationen zu ertrinken. So bekennt Frank Schirrmacher in seinem Buch Payback, dass er den geistigen Anforderungen unserer Zeit nicht mehr gewachsen ist. Sein Kopf komme nicht mehr mit. Er sei unkonzentriert, vergesslich und st{\"a}ndig abgelenkt. Das, was vielen zum Problem geworden ist, sehen viele Studierende eher pragmatisch. Der Wissenserwerb in Zeiten von Internet und E-Learning l{\"a}uft an Hochschulen h{\"a}ufig nach der Helene-Hegemann-Methode ab: Zun{\"a}chst machen sich die Studierenden, z.B. im Rahmen einer Studien- oder Hausarbeit, bei Wikipedia „schlau", ein Einstieg ist geschafft. Anschließend wird dieses Wissen mit Google angereichert. Damit ist {\"U}berblickswissen vorhanden. Mit geschickter copy-and-paste-Komposition l{\"a}sst sich daraus schon ein „Werk" erstellen. Der ein oder andere Studierende gibt sich mit diesem Wissenserwerb zufrieden und bricht seinen Lernprozess hier bereits ab. Nun ist zwar am Ende jeder Studierende f{\"u}r seinen Wissenserwerb selbst verantwortlich. Die erkennbar unbefriedigende Situation sollte die Hochschulen aber herausfordern, das Internet in Vorlesungen und Seminaren auszuprobieren und sinnvolle Anwendungen zu entwickeln. Beispiele gibt es durchaus. Unter der Metapher E-Learning hat sich ein umfangreicher Forschungsschwerpunkt an den Universit{\"a}ten entwickelt. Einige Beispiele von vielen: So hat der Osnabr{\"u}cker Informatik-Professor Oliver Vornberger seine Vorlesungen als Video ins Netz gestellt. Per RSS ist es m{\"o}glich, Sequenzen aufs iPod zu laden. Die {\"u}bliche Dozentenangst, dann w{\"u}rden sie ja vor leeren B{\"a}nken sitzen, scheint unbegr{\"u}ndet. Sie werden von den Studierenden vor allem zur Pr{\"u}fungsvorbereitung genutzt. Wie ist das Internet, das f{\"u}r die junge Generation zu einem alles andere verdr{\"a}ngenden Universalmedium geworden ist, didaktisch in die Hochschullehre einzubinden? Wie also ist konkret mit diesen Herausforderungen umzugehen? Dies soll uns im Folgenden besch{\"a}ftigen.}, language = {de} } @article{MetzgerHaag2013, author = {Metzger, Christiane and Haag, Johann}, title = {„Ich k{\"o}nnte nie wieder zu einem ‚normalen' Stundenplan zur{\"u}ck!"}, series = {Commentarii informaticae didacticae : (CID)}, journal = {Commentarii informaticae didacticae : (CID)}, number = {5}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-64884}, pages = {67 -- 78}, year = {2013}, abstract = {Im Bachelor-Studiengang (B. Sc.) IT Security an der Fachhochschule St. P{\"o}lten wurde im Wintersemester 2011/12 versuchsweise die Lehrorganisation im ersten Fachsemester ver{\"a}ndert: Die Module bzw. Teilmodule wurden nicht mehr alle parallel zueinander unterrichtet, sondern jedes Modul wurde exklusiv {\"u}ber einige Wochen abgehalten. Im Beitrag werden die Auswirkungen und bisherigen Erfahrungen mit dieser Reorganisation der Lehre geschildert: So haben sich die Noten im Mittel um etwa eine Note verbessert, die Zahl derjenigen Studierenden, die durch Pr{\"u}fungen durchfallen, ist drastisch gesunken. Die Zufriedenheit der Studierenden und Lehrenden ist so groß, dass diese Form der Lehrorganisation im gesamten Bachelor- und auch im Masterstudiengang {\"u}bernommen wird.}, language = {de} } @article{KlingerPolutinaBibel2013, author = {Klinger, Melanie and Polutina, Olena and Bibel, Ariane}, title = {Studentische eLearning-Beratung}, series = {Commentarii informaticae didacticae : (CID)}, journal = {Commentarii informaticae didacticae : (CID)}, number = {5}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-65003}, pages = {131 -- 136}, year = {2013}, abstract = {Der vorliegende Beitrag besch{\"a}ftigt sich mit der Frage, wie der eLearning-Support in großen Institutionen effizient gestaltet werden kann. Vorgestellt wird ein experimentelles Beratungsprojekt, das Lehrende bei der Gestaltung von eLearning-Maßnahmen mithilfe der Lernplattform ILIAS1 unterst{\"u}tzt. Neben der Zielsetzung des Projekts werden dessen Aufbau und erste Praxiserfahrungen er{\"o}rtert. Außerdem werden Potenziale des Beratungsformats, die insbesondere mit der individuellen Vor-Ort-Beratung der Lehrenden durch hochschuldidaktisch geschulte Studierende einhergehen, erl{\"a}utert. Abschließend werden Grenzen und Weiterentwicklungsperspektiven des Projekts dargestellt. Am Beispiel der ILIAS-Beratung soll gezeigt werden, dass es sich einer nachhaltigen Organisationsentwicklung als zutr{\"a}glich erweist, Kooperationen erschiedenartiger Organisationseinheiten zu f{\"o}rdern und die entstehenden Synergieeffekte zu nutzen.}, language = {de} } @article{BergesMuehlingHubwieseretal.2013, author = {Berges, Marc and M{\"u}hling, Andreas and Hubwieser, Peter and Steuer, Horst}, title = {Informatik f{\"u}r Nichtinformatiker}, series = {Commentarii informaticae didacticae : (CID)}, journal = {Commentarii informaticae didacticae : (CID)}, number = {5}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-64962}, pages = {105 -- 110}, year = {2013}, abstract = {Wir stellen die Konzeption und erste Ergebnisse einer neuartigen Informatik- Lehrveranstaltung f{\"u}r Studierende der Geod{\"a}sie vor. Das Konzept verbindet drei didaktische Ideen: Kontextorientierung, Peer-Tutoring und Praxisbezug (Course). Die Studierenden sollen dabei in zwei Semestern wichtige Grundlagen der Informatik verstehen und anzuwenden lernen. Durch enge Verzahnung der Aufgaben mit einem f{\"u}r Nichtinformatiker relevanten Kontext, sowie einem sehr hohen Anteil von Selbstt{\"a}tigkeit der Studierenden soll die Motivation f{\"u}r fachfremde Themen gesteigert werden. Die Ergebnisse zeigen, dass die Veranstaltung sehr erfolgreich war.}, language = {de} } @article{EngbringSelke2013, author = {Engbring, Dieter and Selke, Harald}, title = {Informatik und Gesellschaft als Gebiet der Informatik}, series = {Commentarii informaticae didacticae : (CID)}, journal = {Commentarii informaticae didacticae : (CID)}, number = {5}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-64977}, pages = {111 -- 116}, year = {2013}, abstract = {In diesem Beitrag berichten wir {\"u}ber die Erfahrungen einer umgestalteten Lehre im Bereich Informatik und Gesellschft (IuG). Die Gr{\"u}nde f{\"u}r die Umge staltung und die Konzeption werden skizziert. Die Erfahrungen haben wir zu Thesen verdichtet: 1. Informatik und Gesellschaft sollte eine Pflichtveranstaltung im Bachelor-Studium sein, in der Studierende einen {\"U}berblick erhalten, welche gesellschaftlichen Rahmenbedingungen f{\"u}r sie relevant sind und wie man diese in die Praxis mit einbeziehen kann. 2. Historische Inhalte der Informatik sollen hier aufgearbeitet werden, indem man aktuelle Entwicklungen im Kontext ihrer Genese betrachtet.}, language = {de} }