@article{ReinRamsonLinckeetal.2017, author = {Rein, Patrick and Ramson, Stefan and Lincke, Jens and Felgentreff, Tim and Hirschfeld, Robert}, title = {Group-Based Behavior Adaptation Mechanisms in Object-Oriented Systems}, series = {IEEE software}, volume = {34}, journal = {IEEE software}, number = {6}, publisher = {Inst. of Electr. and Electronics Engineers}, address = {Los Alamitos}, issn = {0740-7459}, doi = {10.1109/MS.2017.4121224}, pages = {78 -- 82}, year = {2017}, abstract = {Dynamic and distributed systems require behavior adaptations for groups of objects. Group-based behavior adaptation mechanisms scope adaptations to objects matching conditions beyond class membership. The specification of groups can be explicit or implicit.}, language = {en} } @article{BleifussBornemannJohnsonetal.2018, author = {Bleifuss, Tobias and Bornemann, Leon and Johnson, Theodore and Kalashnikov, Dmitri and Naumann, Felix and Srivastava, Divesh}, title = {Exploring Change}, series = {Proceedings of the VLDB Endowment}, volume = {12}, journal = {Proceedings of the VLDB Endowment}, number = {2}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {2150-8097}, doi = {10.14778/3282495.3282496}, pages = {85 -- 98}, year = {2018}, abstract = {Data and metadata in datasets experience many different kinds of change. Values axe inserted, deleted or updated; rows appear and disappear; columns are added or repurposed, etc. In such a dynamic situation, users might have many questions related to changes in the dataset, for instance which parts of the data are trustworthy and which are not? Users will wonder: How many changes have there been in the recent minutes, days or years? What kind of changes were made at which points of time? How dirty is the data? Is data cleansing required? The fact that data changed can hint at different hidden processes or agendas: a frequently crowd-updated city name may be controversial; a person whose name has been recently changed may be the target of vandalism; and so on. We show various use cases that benefit from recognizing and exploring such change. We envision a system and methods to interactively explore such change, addressing the variability dimension of big data challenges. To this end, we propose a model to capture change and the process of exploring dynamic data to identify salient changes. We provide exploration primitives along with motivational examples and measures for the volatility of data. We identify technical challenges that need to be addressed to make our vision a reality, and propose directions of future work for the data management community.}, language = {en} } @article{MaximovaGieseKrause2018, author = {Maximova, Maria and Giese, Holger and Krause, Christian}, title = {Probabilistic timed graph transformation systems}, series = {Journal of Logical and Algebraic Methods in Programming}, volume = {101}, journal = {Journal of Logical and Algebraic Methods in Programming}, publisher = {Elsevier}, address = {New York}, issn = {2352-2208}, doi = {10.1016/j.jlamp.2018.09.003}, pages = {110 -- 131}, year = {2018}, abstract = {Today, software has become an intrinsic part of complex distributed embedded real-time systems. The next generation of embedded real-time systems will interconnect the today unconnected systems via complex software parts and the service-oriented paradigm. Due to these interconnections, the architecture of systems can be subject to changes at run-time, e.g. when dynamic binding of service end-points is employed or complex collaborations are established dynamically. However, suitable formalisms and techniques that allow for modeling and analysis of timed and probabilistic behavior of such systems as well as of their structure dynamics do not exist so far. To fill the identified gap, we propose Probabilistic Timed Graph Transformation Systems (PTGTSs) as a high-level description language that supports all the necessary aspects of structure dynamics, timed behavior, and probabilistic behavior. We introduce the formal model of PTGTSs in this paper as well as present and formally verify a mapping of models with finite state spaces to probabilistic timed automata (PTA) that allows to use the PRISM model checker to analyze PTGTS models with respect to PTCTL properties. (C) 2018 Elsevier Inc. All rights reserved.}, language = {en} } @article{HirschfeldKawarnura2006, author = {Hirschfeld, Robert and Kawarnura, Katsuya}, title = {Dynamic service adaptation}, series = {Software : practice \& experience}, volume = {36}, journal = {Software : practice \& experience}, number = {11-12}, publisher = {Wiley}, address = {Chichester}, issn = {0038-0644}, doi = {10.1002/spe.766}, pages = {1115 -- 1131}, year = {2006}, abstract = {Change can be observed in our environment and in the technology we build. While changes in the environment happen continuously and implicitly, our technology has to be kept in sync with the changing world around it. Although we can prepare for some of the changes for most of them we cannot. This is especially true for next-generation mobile communication systems that are expected to support the creation of a ubiquitous society where virtually everything is connected and made available within an organic information network. Resources will frequently join or leave the network, new types of media or new combinations of existing types will be used to interact and cooperate, and services will be tailored to preferences and needs of individual customers to better meet their needs. This paper outlines our research in the area of dynamic service adaptation to provide concepts and technologies allowing for such environments. Copyright (C) 2006 John Wiley \& Sons, Ltd.}, language = {en} } @article{DyckGieseLambers2019, author = {Dyck, Johannes and Giese, Holger and Lambers, Leen}, title = {Automatic verification of behavior preservation at the transformation level for relational model transformation}, series = {Software and systems modeling}, volume = {18}, journal = {Software and systems modeling}, number = {5}, publisher = {Springer}, address = {Heidelberg}, issn = {1619-1366}, doi = {10.1007/s10270-018-00706-9}, pages = {2937 -- 2972}, year = {2019}, abstract = {The correctness of model transformations is a crucial element for model-driven engineering of high-quality software. In particular, behavior preservation is an important correctness property avoiding the introduction of semantic errors during the model-driven engineering process. Behavior preservation verification techniques show some kind of behavioral equivalence or refinement between source and target model of the transformation. Automatic tool support is available for verifying behavior preservation at the instance level, i.e., for a given source and target model specified by the model transformation. However, until now there is no sound and automatic verification approach available at the transformation level, i.e., for all source and target models. In this article, we extend our results presented in earlier work (Giese and Lambers, in: Ehrig et al (eds) Graph transformations, Springer, Berlin, 2012) and outline a new transformation-level approach for the sound and automatic verification of behavior preservation captured by bisimulation resp.simulation for outplace model transformations specified by triple graph grammars and semantic definitions given by graph transformation rules. In particular, we first show how behavior preservation can be modeled in a symbolic manner at the transformation level and then describe that transformation-level verification of behavior preservation can be reduced to invariant checking of suitable conditions for graph transformations. We demonstrate that the resulting checking problem can be addressed by our own invariant checker for an example of a transformation between sequence charts and communicating automata.}, language = {en} } @misc{HoelzleBjoerkVisscher2019, author = {H{\"o}lzle, Katharina and Bj{\"o}rk, Jennie and Visscher, Klaasjan}, title = {Editorial}, series = {Creativity and innovation management}, volume = {28}, journal = {Creativity and innovation management}, number = {1}, publisher = {Wiley}, address = {Hoboken}, issn = {0963-1690}, doi = {10.1111/caim.12307}, pages = {3 -- 4}, year = {2019}, abstract = {The new year starts and many of us have right away been burdened with conference datelines, grant proposal datelines, teaching obligations, paper revisions and many other things. While being more or less successful in fulfilling To-Do lists and ticking of urgent (and sometimes even important) things, we often feel that our ability to be truly creative or innovative is rather restrained by this (external pressure). With this, we are not alone. Many studies have shown that stress does influence overall work performance and satisfaction. Furthermore, more and more students and entry-levels look for work-life balance and search for employers that offer a surrounding and organization considering these needs. High-Tech and start-up companies praise themselves for their "Feel-Good managers" or Yoga programs. But is this really helpful? Is there indeed a relationship between stress, adverse work environment and creativity or innovation? What are the supporting factors in a work environment that lets employees be more creative? What kind of leadership do we need for innovative behaviour and to what extent can an organization create support structures that reduce the stress we feel? The first issue of Creativity and Innovation Management in 2019 gives some first answers to these questions and hopefully some food for thought. The first paper written by Dirk De Clercq, and Imanol Belausteguigoitia starts with the question which impact work overload has on creative behaviour. The authors look at how employees' perceptions of work overload reduces their creative behaviour. While they find empirical proof for this relationship, they can also show that the effect is weaker with higher levels of passion for work, emotion sharing, and organizational commitment. The buffering effects of emotion sharing and organizational commitment are particularly strong when they are combined with high levels of passion for work. Their findings give first empirical proof that organizations can and should take an active role in helping their employees reducing the effects of adverse work conditions in order to become or stay creative. However, not only work overload is harming creative behaviour, also the fear of losing one's job has detrimental effects on innovative work behaviour. Anahi van Hootegem, Wendy Niesen and Hans de Witte verify that stress and adverse environmental conditions shape our perception of work. Using threat rigidity theory and an empirical study of 394 employees, they show that the threat of job loss impairs employees' innovativeness through increased irritation and decreased concentration. Organizations can help their employees coping better with this insecurity by communicating more openly and providing different support structures. Support often comes from leadership and the support of the supervisor can clearly shape an employee's motivation to show creative behaviour. Wenjing Cai, Evgenia Lysova, Bart A. G. Bossink, Svetlana N. Khapova and Weidong Wang report empirical findings from a large-scale survey in China where they find that supervisor support for creativity and job characteristics effectively activate individual psychological capital associated with employee creativity. On a slight different notion, Gisela B{\"a}cklander looks at agile practices in a very well-known High Tech firm. In "Doing Complexity Leadership Theory: How agile coaches at Spotify practice enabling leadership", she researches the role of agile coaches and how they practice enabling leadership, a key balancing force in complexity leadership. She finds that the active involvement of coaches in observing group dynamics, surfacing conflict and facilitating and encouraging constructive dialogue leads to a positive working environment and the well-being of employees. Quotes from the interviews suggest that the flexible structure provided by the coaches may prove a fruitful way to navigate and balance autonomy and alignment in organizations. The fifth paper of Frederik Anseel, Michael Vandamme, Wouter Duyck and Eric Rietzchel goes a little further down this road and researches how groups can be motivated better to select truly creative ideas. We know from former studies that groups often perform rather poorly when it comes to selecting creative ideas for implementation. The authors find in an extensive field experiment that under conditions of high epistemic motivation, proself motivated groups select significantly more creative and original ideas than prosocial groups. They conclude however, that more research is needed to understand better why these differences occur. The prosocial behaviour of groups is also the theme of Karin Moser, Jeremy F. Dawson and Michael A. West's paper on "Antecedents of team innovation in health care teams". They look at team-level motivation and how a prosocial team environment, indicated by the level of helping behaviour and information-sharing, may foster innovation. Their results support the hypotheses of both information-sharing and helping behaviour on team innovation. They suggest that both factors may actually act as buffer against constraints in team work, such as large team size or high occupational diversity in cross-functional health care teams, and potentially turn these into resources supporting team innovation rather than acting as barriers. Away from teams and onto designing favourable work environments, the seventh paper of Ferney Osorio, Laurent Dupont, Mauricio Camargo, Pedro Palominos, Jose Ismael Pena and Miguel Alfaro looks into innovation laboratories. Although several studies have tackled the problem of design, development and sustainability of these spaces for innovation, there is still a gap in understanding how the capabilities and performance of these environments are affected by the strategic intentions at the early stages of their design and functioning. The authors analyse and compare eight existing frameworks from literature and propose a new framework for researchers and practitioners aiming to assess or to adapt innovation laboratories. They test their framework in an exploratory study with fifteen laboratories from five different countries and give recommendations for the future design of these laboratories. From design to design thinking goes our last paper from Rama Krishna Reddy Kummitha on "Design Thinking in Social Organisations: Understanding the role of user engagement" where she studies how users persuade social organisations to adopt design thinking. Looking at four social organisations in India during 2008 to 2013, she finds that the designer roles are blurred when social organisations adopt design thinking, while users in the form of interconnecting agencies reduce the gap between designers and communities. The last two articles were developed from papers presented at the 17th International CINet conference organized in Turin in 2016 by Paolo Neirotti and his colleagues. In the first article, F{\´a}bio Gama, Johan Frishammar and Vinit Parida focus on ideation and open innovation in small- and medium-sized enterprises. They investigate the relationship between systematic idea generation and performance and the moderating role of market-based partnerships. Based on a survey among manufacturing SMEs, they conclude that higher levels of performance are reached and that collaboration with customers and suppliers pays off most when idea generation is done in a highly systematic way. The second article, by Anna Holmquist, Mats Magnusson and Mona Livholts, resonates the theme of the CINet conference 'Innovation and Tradition; combining the old and the new'. They explore how tradition is used in craft-based design practices to create new meaning. Applying a narrative 'research through design' approach they uncover important design elements, and tensions between them. Please enjoy this first issue of CIM in 2019 and we wish you creativity and innovation without too much stress in the months to come.}, language = {en} } @misc{BjoerkHoelzle2018, author = {Bj{\"o}rk, Jennie and H{\"o}lzle, Katharina}, title = {Editorial}, series = {Creativity and innovation management}, volume = {27}, journal = {Creativity and innovation management}, number = {4}, publisher = {Wiley}, address = {Hoboken}, issn = {0963-1690}, doi = {10.1111/caim.12298}, pages = {373 -- 374}, year = {2018}, abstract = {"Never doubt that a small group of thoughtful, committed citizens can change the world; indeed, it's the only thing that ever has. - Margaret Mead." With the last issue of this year we want to point out directions towards what will come and what challenges and opportunities lie ahead of us. More needed than ever are joint creative efforts to find ways to collaborate and innovate in order to secure the wellbeing of our earth for the next generation to come. We have found ourselves puzzled that we could assemble a sustainability issue without having a call for papers or a special issue. In fact, many of the submissions we currently receive, deal with sustainable, ecological or novel approaches to management and organizations. As creativity and innovation are undisputable necessary ingredients for reaching the sustainable development goals, empirical proof and research in this area are still in their infancy. While the role of design and design thinking has been highlighted before for solving wicked societal problems, a lot more research is needed which creative and innovative ways organisations and societies can take to find solutions to climate change, poverty, hunger and education. We would therefore like to call to you, our readers and writers to tackle these problems with your research. The first article in this issue addresses one of the above named challenges - the role of innovation for achieving the transition to a low-carbon energy world. In "Innovating for low-carbon energy through hydropower: Enabling a conservation charity's transition to a low-carbon community", the authors John Gallagher, Paul Coughlan, A. Prysor Williams and Aonghus McNabola look at how an eco-design approach has supported a community transition to low-carbon. They highlight the importance of effective management as well as external collaboration and how the key for success lay in fostering an open environment for creativity and idea sharing. The second article addresses another of the grand challenges, the future of mobility and uses a design-driven approach to develop scenarios for mobility in cities. In "Designing radical innovations of meanings for society: envisioning new scenarios for smart mobility", the authors Claudio Dell'Era, Naiara Altuna and Roberto Verganti investigate how new meanings can be designed and proposed to society rather than to individuals in the particular context of smart mobility. Through two case studies the authors argue for a multi-level perspective, taking the perspective of the society to solve societal challenges while considering the needs of the individual. The latter is needed because we will not change if our needs are not addressed. Furthermore, the authors find that both, meaning and technology need to be considered to create radical innovation for society. The role of meaning continues in the third article in this issue. The authors Marta Gasparin and William Green show in their article "Reconstructing meaning without redesigning products: The case of the Serie7 chair" how meaning changes over time even though the product remains the same. Through an in-depth retrospective study of the Serie 7 chair the authors investigate the relationship between meaning and the materiality of the object, and show the importance of materiality in constructing product meaning over long periods. Translating this meaning over the course of the innovation process is an important task of management in order to gain buy-in from all involved stakeholders. In the following article "A systematic approach for new technology development by using a biomimicry-based TRIZ contradiction matrix" the authors Byungun Yoon, Chaeguk Lim, Inchae Park and Dooseob Yoon develop a systematic process combining biomimicry and technology-based TRIZ in order to solve technological problems or develop new technologies based on completely new sources or combinations from technology and biology. In the fifth article in this issue "Innovating via Building Absorptive Capacity: Interactive Effects of Top Management Support of Learning, Employee Learning Orientation, and Decentralization Structure" the authors Li-Yun Sun, Chenwei Li and Yuntao Dong examine the effect of learning-related personal and contextual factors on organizational absorptive capability and subsequent innovative performance. The authors find positive effects as well as a moderation influence of decentralized organizational decision-making structures. In the sixth article "Creativity within boundaries: social identity and the development of new ideas in franchise systems" the authors Fanny Simon, Catherine Allix-Desfautaux, Nabil Khelil and Anne-Laure Le Nadant address the paradox of balancing novelty and conformity for creativity in a franchise system. This research is one of the first we know to explicitly address creativity and innovation in such a rigid and pre-determined system. Using a social identity perspective, they can show that social control, which may be exerted by manipulating group identity, is an efficient lever to increase both the creation and the diffusion of the idea. Furthermore, they show that franchisees who do not conform to the norm of the group are stigmatized and must face pressure from the group to adapt their behaviors. This has important implications for future research. In the following article "Exploring employee interactions and quality of contributions in intra-organisational innovation platforms" the authors Dimitra Chasanidou, Nj{\aa}l Sivertstol and Jarle Hildrum examine the user interactions in an intra-organisational innovation platform, and also address the influence of user interactions for idea development. The authors find that employees communicate through the innovation platform with different interaction, contribution and collaboration types and propose three types of contribution qualities—passive, efficient and balanced contribution. In the eighth article "Ready for Take-off": How Open Innovation influences startup success" Cristina Marullo, Elena Casprini, Alberto di Minin and Andrea Piccaluga seek to predict new venture success based on factors that can be observed in the pre-startup phase. The authors introduce different variables of founding teams and how these relate to startup success. Building on large-scale dataset of submitted business plans at UC Berkeley, they can show that teams with high skills diversity and past joint experience are a lot better able to prevent the risk of business failure at entry and to adapt the internal resources to market conditions. Furthermore, it is crucial for the team to integrate many external knowledge sources into their process (openness) in order to be successful. The crucial role of knowledge and how it is communicated and shared is the focal point of Natalya Sergeeva's and Anna Trifilova's article on "The role of storytelling in the innovation process". They authors can show how storytelling has an important role to play when it comes to motivating employees to innovate and promoting innovation success stories inside and outside the organization. The deep human desire to hear and experience stories is also addressed in the last article in this issue "Gamification Approaches to the Early Stage of Innovation" by Rui Patricio, Antonio Moreira and Francesco Zurlo. Using gamification approaches at the early stage of innovation promises to create better team coherence, let employees experience fun and engagement, improve communication and foster knowledge exchange. Using an analytical framework, the authors analyze 15 articles that have looked at gamification in the context of innovation management before. They find that gamification indeed supports firms in becoming better at performing complex innovation tasks and managing innovation challenges. Furthermore, gamification in innovation creates a space for inspiration, improves creativity and the generation of high potential ideas.}, language = {en} } @article{OrejasPinoNavarroetal.2018, author = {Orejas, Fernando and Pino, Elvira and Navarro, Marisa and Lambers, Leen}, title = {Institutions for navigational logics for graphical structures}, series = {Theoretical computer science}, volume = {741}, journal = {Theoretical computer science}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0304-3975}, doi = {10.1016/j.tcs.2018.02.031}, pages = {19 -- 24}, year = {2018}, abstract = {We show that a Navigational Logic, i.e., a logic to express properties about graphs and about paths in graphs is a semi-exact institution. In this way, we can use a number of operations to structure and modularize our specifications. Moreover, using the properties of our institution, we also show how to structure single formulas, which in our formalism could be quite complex.}, language = {en} } @article{SoechtingTrapp2020, author = {S{\"o}chting, Maximilian and Trapp, Matthias}, title = {Controlling image-stylization techniques using eye tracking}, series = {Science and Technology Publications}, journal = {Science and Technology Publications}, publisher = {Springer}, address = {Berlin}, issn = {2184-4321}, pages = {10}, year = {2020}, abstract = {With the spread of smart phones capable of taking high-resolution photos and the development of high-speed mobile data infrastructure, digital visual media is becoming one of the most important forms of modern communication. With this development, however, also comes a devaluation of images as a media form with the focus becoming the frequency at which visual content is generated instead of the quality of the content. In this work, an interactive system using image-abstraction techniques and an eye tracking sensor is presented, which allows users to experience diverting and dynamic artworks that react to their eye movement. The underlying modular architecture enables a variety of different interaction techniques that share common design principles, making the interface as intuitive as possible. The resulting experience allows users to experience a game-like interaction in which they aim for a reward, the artwork, while being held under constraints, e.g., not blinking. The co nscious eye movements that are required by some interaction techniques hint an interesting, possible future extension for this work into the field of relaxation exercises and concentration training.}, language = {en} } @article{BertiEquilleHarmouchNaumannetal.2018, author = {Berti-Equille, Laure and Harmouch, Nazar and Naumann, Felix and Novelli, Noel and Saravanan, Thirumuruganathan}, title = {Discovery of genuine functional dependencies from relational data with missing values}, series = {Proceedings of the VLDB Endowment}, volume = {11}, journal = {Proceedings of the VLDB Endowment}, number = {8}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {2150-8097}, doi = {10.14778/3204028.3204032}, pages = {880 -- 892}, year = {2018}, abstract = {Functional dependencies (FDs) play an important role in maintaining data quality. They can be used to enforce data consistency and to guide repairs over a database. In this work, we investigate the problem of missing values and its impact on FD discovery. When using existing FD discovery algorithms, some genuine FDs could not be detected precisely due to missing values or some non-genuine FDs can be discovered even though they are caused by missing values with a certain NULL semantics. We define a notion of genuineness and propose algorithms to compute the genuineness score of a discovered FD. This can be used to identify the genuine FDs among the set of all valid dependencies that hold on the data. We evaluate the quality of our method over various real-world and semi-synthetic datasets with extensive experiments. The results show that our method performs well for relatively large FD sets and is able to accurately capture genuine FDs.}, language = {en} } @misc{RischKrestel2018, author = {Risch, Julian and Krestel, Ralf}, title = {My Approach = Your Apparatus?}, series = {Libraries}, journal = {Libraries}, publisher = {Association for Computing Machinery}, address = {New York}, isbn = {978-1-4503-5178-2}, issn = {2575-7865}, doi = {10.1145/3197026.3197038}, pages = {283 -- 292}, year = {2018}, abstract = {Comparative text mining extends from genre analysis and political bias detection to the revelation of cultural and geographic differences, through to the search for prior art across patents and scientific papers. These applications use cross-collection topic modeling for the exploration, clustering, and comparison of large sets of documents, such as digital libraries. However, topic modeling on documents from different collections is challenging because of domain-specific vocabulary. We present a cross-collection topic model combined with automatic domain term extraction and phrase segmentation. This model distinguishes collection-specific and collection-independent words based on information entropy and reveals commonalities and differences of multiple text collections. We evaluate our model on patents, scientific papers, newspaper articles, forum posts, and Wikipedia articles. In comparison to state-of-the-art cross-collection topic modeling, our model achieves up to 13\% higher topic coherence, up to 4\% lower perplexity, and up to 31\% higher document classification accuracy. More importantly, our approach is the first topic model that ensures disjunct general and specific word distributions, resulting in clear-cut topic representations.}, language = {en} } @phdthesis{Nikaj2019, author = {Nikaj, Adriatik}, title = {Restful choreographies}, doi = {10.25932/publishup-43890}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-438903}, school = {Universit{\"a}t Potsdam}, pages = {xix, 146}, year = {2019}, abstract = {Business process management has become a key instrument to organize work as many companies represent their operations in business process models. Recently, business process choreography diagrams have been introduced as part of the Business Process Model and Notation standard to represent interactions between business processes, run by different partners. When it comes to the interactions between services on the Web, Representational State Transfer (REST) is one of the primary architectural styles employed by web services today. Ideally, the RESTful interactions between participants should implement the interactions defined at the business choreography level. The problem, however, is the conceptual gap between the business process choreography diagrams and RESTful interactions. Choreography diagrams, on the one hand, are modeled from business domain experts with the purpose of capturing, communicating and, ideally, driving the business interactions. RESTful interactions, on the other hand, depend on RESTful interfaces that are designed by web engineers with the purpose of facilitating the interaction between participants on the internet. In most cases however, business domain experts are unaware of the technology behind web service interfaces and web engineers tend to overlook the overall business goals of web services. While there is considerable work on using process models during process implementation, there is little work on using choreography models to implement interactions between business processes. This thesis addresses this research gap by raising the following research question: How to close the conceptual gap between business process choreographies and RESTful interactions? This thesis offers several research contributions that jointly answer the research question. The main research contribution is the design of a language that captures RESTful interactions between participants---RESTful choreography modeling language. Formal completeness properties (with respect to REST) are introduced to validate its instances, called RESTful choreographies. A systematic semi-automatic method for deriving RESTful choreographies from business process choreographies is proposed. The method employs natural language processing techniques to translate business interactions into RESTful interactions. The effectiveness of the approach is shown by developing a prototypical tool that evaluates the derivation method over a large number of choreography models. In addition, the thesis proposes solutions towards implementing RESTful choreographies. In particular, two RESTful service specifications are introduced for aiding, respectively, the execution of choreographies' exclusive gateways and the guidance of RESTful interactions.}, language = {en} } @article{VanHoutTachmazidouBackmanetal.2020, author = {Van Hout, Cristopher V. and Tachmazidou, Ioanna and Backman, Joshua D. and Hoffman, Joshua D. and Liu, Daren and Pandey, Ashutosh K. and Gonzaga-Jauregui, Claudia and Khalid, Shareef and Ye, Bin and Banerjee, Nilanjana and Li, Alexander H. and O'Dushlaine, Colm and Marcketta, Anthony and Staples, Jeffrey and Schurmann, Claudia and Hawes, Alicia and Maxwell, Evan and Barnard, Leland and Lopez, Alexander and Penn, John and Habegger, Lukas and Blumenfeld, Andrew L. and Bai, Xiaodong and O'Keeffe, Sean and Yadav, Ashish and Praveen, Kavita and Jones, Marcus and Salerno, William J. and Chung, Wendy K. and Surakka, Ida and Willer, Cristen J. and Hveem, Kristian and Leader, Joseph B. and Carey, David J. and Ledbetter, David H. and Cardon, Lon and Yancopoulos, George D. and Economides, Aris and Coppola, Giovanni and Shuldiner, Alan R. and Balasubramanian, Suganthi and Cantor, Michael and Nelson, Matthew R. and Whittaker, John and Reid, Jeffrey G. and Marchini, Jonathan and Overton, John D. and Scott, Robert A. and Abecasis, Goncalo R. and Yerges-Armstrong, Laura M. and Baras, Aris}, title = {Exome sequencing and characterization of 49,960 individuals in the UK Biobank}, series = {Nature : the international weekly journal of science}, volume = {586}, journal = {Nature : the international weekly journal of science}, number = {7831}, publisher = {Macmillan Publishers Limited}, address = {London}, organization = {Regeneron Genetics Ctr}, issn = {0028-0836}, doi = {10.1038/s41586-020-2853-0}, pages = {749 -- 756}, year = {2020}, abstract = {The UK Biobank is a prospective study of 502,543 individuals, combining extensive phenotypic and genotypic data with streamlined access for researchers around the world(1). Here we describe the release of exome-sequence data for the first 49,960 study participants, revealing approximately 4 million coding variants (of which around 98.6\% have a frequency of less than 1\%). The data include 198,269 autosomal predicted loss-of-function (LOF) variants, a more than 14-fold increase compared to the imputed sequence. Nearly all genes (more than 97\%) had at least one carrier with a LOF variant, and most genes (more than 69\%) had at least ten carriers with a LOF variant. We illustrate the power of characterizing LOF variants in this population through association analyses across 1,730 phenotypes. In addition to replicating established associations, we found novel LOF variants with large effects on disease traits, includingPIEZO1on varicose veins,COL6A1on corneal resistance,MEPEon bone density, andIQGAP2andGMPRon blood cell traits. We further demonstrate the value of exome sequencing by surveying the prevalence of pathogenic variants of clinical importance, and show that 2\% of this population has a medically actionable variant. Furthermore, we characterize the penetrance of cancer in carriers of pathogenicBRCA1andBRCA2variants. Exome sequences from the first 49,960 participants highlight the promise of genome sequencing in large population-based studies and are now accessible to the scientific community.
Exome sequences from the first 49,960 participants in the UK Biobank highlight the promise of genome sequencing in large population-based studies and are now accessible to the scientific community.}, language = {en} } @misc{SchlosserKossmannBoissier2019, author = {Schlosser, Rainer and Kossmann, Jan and Boissier, Martin}, title = {Efficient Scalable Multi-Attribute Index Selection Using Recursive Strategies}, series = {2019 IEEE 35th International Conference on Data Engineering (ICDE)}, journal = {2019 IEEE 35th International Conference on Data Engineering (ICDE)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-7474-1}, issn = {1084-4627}, doi = {10.1109/ICDE.2019.00113}, pages = {1238 -- 1249}, year = {2019}, abstract = {An efficient selection of indexes is indispensable for database performance. For large problem instances with hundreds of tables, existing approaches are not suitable: They either exhibit prohibitive runtimes or yield far from optimal index configurations by strongly limiting the set of index candidates or not handling index interaction explicitly. We introduce a novel recursive strategy that does not exclude index candidates in advance and effectively accounts for index interaction. Using large real-world workloads, we demonstrate the applicability of our approach. Further, we evaluate our solution end to end with a commercial database system using a reproducible setup. We show that our solutions are near-optimal for small index selection problems. For larger problems, our strategy outperforms state-of-the-art approaches in both scalability and solution quality.}, language = {en} } @article{ChristopherAshwoodBittremieuxDeutschetal.2020, author = {Christopher Ashwood, Wout Bittremieux and Bittremieux, Wout and Deutsch, Eric W. and Doncheva, Nadezhda T. and Dorfer, Viktoria and Gabriels, Ralf and Gorshkov, Vladimir and Gupta, Surya and Jones, Andrew R. and K{\"a}ll, Lukas and Kopczynski, Dominik and Lane, Lydie and Lautenbacher, Ludwig and Legeay, Marc and Locard-Paulet, Marie and Mesuere, Bart and Sachsenberg, Timo and Salz, Renee and Samaras, Patroklos and Schiebenhoefer, Henning and Schmidt, Tobias and Schw{\"a}mmle, Veit and Soggiu, Alessio and Uszkoreit, Julian and Van Den Bossche, Tim and Van Puyvelde, Bart and Van Strien, Joeri and Verschaffelt, Pieter and Webel, Henry and Willems, Sander and Perez-Riverolab, Yasset and Netz, Eugen and Pfeuffer, Julianus}, title = {Proceedings of the EuBIC-MS 2020 Developers' Meeting}, series = {EuPA Open Proteomics}, volume = {24}, journal = {EuPA Open Proteomics}, publisher = {Elsevier}, address = {Amsterdam}, issn = {2212-9685}, doi = {10.1016/j.euprot.2020.11.001}, pages = {1 -- 6}, year = {2020}, abstract = {The 2020 European Bioinformatics Community for Mass Spectrometry (EuBIC-MS) Developers' meeting was held from January 13th to January 17th 2020 in Nyborg, Denmark. Among the participants were scientists as well as developers working in the field of computational mass spectrometry (MS) and proteomics. The 4-day program was split between introductory keynote lectures and parallel hackathon sessions. During the latter, the participants developed bioinformatics tools and resources addressing outstanding needs in the community. The hackathons allowed less experienced participants to learn from more advanced computational MS experts, and to actively contribute to highly relevant research projects. We successfully produced several new tools that will be useful to the proteomics community by improving data analysis as well as facilitating future research. All keynote recordings are available on https://doi.org/10.5281/zenodo.3890181.}, language = {en} } @article{StojanovicTrappRichteretal.2019, author = {Stojanovic, Vladeta and Trapp, Matthias and Richter, Rico and D{\"o}llner, J{\"u}rgen Roland Friedrich}, title = {Service-oriented semantic enrichment of indoor point clouds using octree-based multiview classification}, series = {Graphical Models}, volume = {105}, journal = {Graphical Models}, publisher = {Elsevier}, address = {San Diego}, issn = {1524-0703}, doi = {10.1016/j.gmod.2019.101039}, pages = {18}, year = {2019}, abstract = {The use of Building Information Modeling (BIM) for Facility Management (FM) in the Operation and Maintenance (O\&M) stages of the building life-cycle is intended to bridge the gap between operations and digital data, but lacks the functionality of assessing the state of the built environment due to non-automated generation of associated semantics. 3D point clouds can be used to capture the physical state of the built environment, but also lack these associated semantics. A prototypical implementation of a service-oriented architecture for classification of indoor point cloud scenes of office environments is presented, using multiview classification. The multiview classification approach is tested using a retrained Convolutional Neural Network (CNN) model - Inception V3. The presented approach for classifying common office furniture objects (chairs, sofas and desks), contained in 3D point cloud scans, is tested and evaluated. The results show that the presented approach can classify common office furniture up to an acceptable degree of accuracy, and is suitable for quick and robust semantics approximation - based on RGB (red, green and blue color channel) cubemap images of the octree partitioned areas of the 3D point cloud scan. Additional methods for web-based 3D visualization, editing and annotation of point clouds are also discussed. Using the described approach, captured scans of indoor environments can be semantically enriched using object annotations derived from multiview classification results. Furthermore, the presented approach is suited for semantic enrichment of lower resolution indoor point clouds acquired using commodity mobile devices.}, language = {en} } @misc{FrickeDoellnerAsche2018, author = {Fricke, Andreas and D{\"o}llner, J{\"u}rgen Roland Friedrich and Asche, Hartmut}, title = {Servicification - Trend or Paradigm Shift in Geospatial Data Processing?}, series = {Computational Science and Its Applications - ICCSA 2018, PT III}, volume = {10962}, journal = {Computational Science and Its Applications - ICCSA 2018, PT III}, publisher = {Springer}, address = {Cham}, isbn = {978-3-319-95168-3}, issn = {0302-9743}, doi = {10.1007/978-3-319-95168-3_23}, pages = {339 -- 350}, year = {2018}, abstract = {Currently we are witnessing profound changes in the geospatial domain. Driven by recent ICT developments, such as web services, serviceoriented computing or open-source software, an explosion of geodata and geospatial applications or rapidly growing communities of non-specialist users, the crucial issue is the provision and integration of geospatial intelligence in these rapidly changing, heterogeneous developments. This paper introduces the concept of Servicification into geospatial data processing. Its core idea is the provision of expertise through a flexible number of web-based software service modules. Selection and linkage of these services to user profiles, application tasks, data resources, or additional software allow for the compilation of flexible, time-sensitive geospatial data handling processes. Encapsulated in a string of discrete services, the approach presented here aims to provide non-specialist users with geospatial expertise required for the effective, professional solution of a defined application problem. Providing users with geospatial intelligence in the form of web-based, modular services, is a completely different approach to geospatial data processing. This novel concept puts geospatial intelligence, made available through services encapsulating rule bases and algorithms, in the centre and at the disposal of the users, regardless of their expertise.}, language = {en} } @misc{ReimannKlingbeilPasewaldtetal.2018, author = {Reimann, Max and Klingbeil, Mandy and Pasewaldt, Sebastian and Semmo, Amir and Trapp, Matthias and D{\"o}llner, J{\"u}rgen Roland Friedrich}, title = {MaeSTrO: A Mobile App for Style Transfer Orchestration using Neural Networks}, series = {International Conference on Cyberworlds (CW)}, journal = {International Conference on Cyberworlds (CW)}, editor = {Sourin, A Sourina}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-7315-7}, doi = {10.1109/CW.2018.00016}, pages = {9 -- 16}, year = {2018}, abstract = {Mobile expressive rendering gained increasing popularity among users seeking casual creativity by image stylization and supports the development of mobile artists as a new user group. In particular, neural style transfer has advanced as a core technology to emulate characteristics of manifold artistic styles. However, when it comes to creative expression, the technology still faces inherent limitations in providing low-level controls for localized image stylization. This work enhances state-of-the-art neural style transfer techniques by a generalized user interface with interactive tools to facilitate a creative and localized editing process. Thereby, we first propose a problem characterization representing trade-offs between visual quality, run-time performance, and user control. We then present MaeSTrO, a mobile app for orchestration of neural style transfer techniques using iterative, multi-style generative and adaptive neural networks that can be locally controlled by on-screen painting metaphors. At this, first user tests indicate different levels of satisfaction for the implemented techniques and interaction design.}, language = {en} } @misc{LimbergerGroplerBuschmannetal.2018, author = {Limberger, Daniel and Gropler, Anne and Buschmann, Stefan and D{\"o}llner, J{\"u}rgen Roland Friedrich and Wasty, Benjamin}, title = {OpenLL}, series = {22nd International Conference Information Visualisation (IV)}, journal = {22nd International Conference Information Visualisation (IV)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-7202-0}, doi = {10.1109/iV.2018.00039}, pages = {175 -- 181}, year = {2018}, abstract = {Today's rendering APIs lack robust functionality and capabilities for dynamic, real-time text rendering and labeling, which represent key requirements for 3D application design in many fields. As a consequence, most rendering systems are barely or not at all equipped with respective capabilities. This paper drafts the unified text rendering and labeling API OpenLL intended to complement common rendering APIs, frameworks, and transmission formats. For it, various uses of static and dynamic placement of labels are showcased and a text interaction technique is presented. Furthermore, API design constraints with respect to state-of-the-art text rendering techniques are discussed. This contribution is intended to initiate a community-driven specification of a free and open label library.}, language = {en} } @article{ReimannKlingbeilPasewaldtetal.2019, author = {Reimann, Max and Klingbeil, Mandy and Pasewaldt, Sebastian and Semmo, Amir and Trapp, Matthias and D{\"o}llner, J{\"u}rgen Roland Friedrich}, title = {Locally controllable neural style transfer on mobile devices}, series = {The Visual Computer}, volume = {35}, journal = {The Visual Computer}, number = {11}, publisher = {Springer}, address = {New York}, issn = {0178-2789}, doi = {10.1007/s00371-019-01654-1}, pages = {1531 -- 1547}, year = {2019}, abstract = {Mobile expressive rendering gained increasing popularity among users seeking casual creativity by image stylization and supports the development of mobile artists as a new user group. In particular, neural style transfer has advanced as a core technology to emulate characteristics of manifold artistic styles. However, when it comes to creative expression, the technology still faces inherent limitations in providing low-level controls for localized image stylization. In this work, we first propose a problem characterization of interactive style transfer representing a trade-off between visual quality, run-time performance, and user control. We then present MaeSTrO, a mobile app for orchestration of neural style transfer techniques using iterative, multi-style generative and adaptive neural networks that can be locally controlled by on-screen painting metaphors. At this, we enhance state-of-the-art neural style transfer techniques by mask-based loss terms that can be interactively parameterized by a generalized user interface to facilitate a creative and localized editing process. We report on a usability study and an online survey that demonstrate the ability of our app to transfer styles at improved semantic plausibility.}, language = {en} } @article{VollmerTrappSchumannetal.2018, author = {Vollmer, Jan Ole and Trapp, Matthias and Schumann, Heidrun and D{\"o}llner, J{\"u}rgen Roland Friedrich}, title = {Hierarchical spatial aggregation for level-of-detail visualization of 3D thematic data}, series = {ACM transactions on spatial algorithms and systems}, volume = {4}, journal = {ACM transactions on spatial algorithms and systems}, number = {3}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {2374-0353}, doi = {10.1145/3234506}, pages = {23}, year = {2018}, abstract = {Thematic maps are a common tool to visualize semantic data with a spatial reference. Combining thematic data with a geometric representation of their natural reference frame aids the viewer's ability in gaining an overview, as well as perceiving patterns with respect to location; however, as the amount of data for visualization continues to increase, problems such as information overload and visual clutter impede perception, requiring data aggregation and level-of-detail visualization techniques. While existing aggregation techniques for thematic data operate in a 2D reference frame (i.e., map), we present two aggregation techniques for 3D spatial and spatiotemporal data mapped onto virtual city models that hierarchically aggregate thematic data in real time during rendering to support on-the-fly and on-demand level-of-detail generation. An object-based technique performs aggregation based on scene-specific objects and their hierarchy to facilitate per-object analysis, while the scene-based technique aggregates data solely based on spatial locations, thus supporting visual analysis of data with arbitrary reference geometry. Both techniques can apply different aggregation functions (mean, minimum, and maximum) for ordinal, interval, and ratio-scaled data and can be easily extended with additional functions. Our implementation utilizes the programmable graphics pipeline and requires suitably encoded data, i.e., textures or vertex attributes. We demonstrate the application of both techniques using real-world datasets, including solar potential analyses and the propagation of pressure waves in a virtual city model.}, language = {en} } @article{ScheibelTrappLimbergeretal.2020, author = {Scheibel, Willy and Trapp, Matthias and Limberger, Daniel and D{\"o}llner, J{\"u}rgen Roland Friedrich}, title = {A taxonomy of treemap visualization techniques}, series = {Science and Technology Publications}, journal = {Science and Technology Publications}, publisher = {Springer}, address = {Berlin}, pages = {8}, year = {2020}, abstract = {A treemap is a visualization that has been specifically designed to facilitate the exploration of tree-structured data and, more general, hierarchically structured data. The family of visualization techniques that use a visual metaphor for parent-child relationships based "on the property of containment" (Johnson, 1993) is commonly referred to as treemaps. However, as the number of variations of treemaps grows, it becomes increasingly important to distinguish clearly between techniques and their specific characteristics. This paper proposes to discern between Space-filling Treemap TS, Containment Treemap TC, Implicit Edge Representation Tree TIE, and Mapped Tree TMT for classification of hierarchy visualization techniques and highlights their respective properties. This taxonomy is created as a hyponymy, i.e., its classes have an is-a relationship to one another: TS TC TIE TMT. With this proposal, we intend to stimulate a discussion on a more unambiguous classification of treemaps and, furthermore, broaden what is understood by the concept of treemap itself.}, language = {en} } @article{KoetzingKrejca2019, author = {K{\"o}tzing, Timo and Krejca, Martin Stefan}, title = {First-hitting times under drift}, series = {Theoretical computer science}, volume = {796}, journal = {Theoretical computer science}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0304-3975}, doi = {10.1016/j.tcs.2019.08.021}, pages = {51 -- 69}, year = {2019}, abstract = {For the last ten years, almost every theoretical result concerning the expected run time of a randomized search heuristic used drift theory, making it the arguably most important tool in this domain. Its success is due to its ease of use and its powerful result: drift theory allows the user to derive bounds on the expected first-hitting time of a random process by bounding expected local changes of the process - the drift. This is usually far easier than bounding the expected first-hitting time directly. Due to the widespread use of drift theory, it is of utmost importance to have the best drift theorems possible. We improve the fundamental additive, multiplicative, and variable drift theorems by stating them in a form as general as possible and providing examples of why the restrictions we keep are still necessary. Our additive drift theorem for upper bounds only requires the process to be lower-bounded, that is, we remove unnecessary restrictions like a finite, discrete, or bounded state space. As corollaries, the same is true for our upper bounds in the case of variable and multiplicative drift. By bounding the step size of the process, we derive new lower-bounding multiplicative and variable drift theorems. Last, we also state theorems that are applicable when the process has a drift of 0, by using a drift on the variance of the process.}, language = {en} } @article{FriedrichKoetzingKrejca2019, author = {Friedrich, Tobias and K{\"o}tzing, Timo and Krejca, Martin Stefan}, title = {Unbiasedness of estimation-of-distribution algorithms}, series = {Theoretical computer science}, volume = {785}, journal = {Theoretical computer science}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0304-3975}, doi = {10.1016/j.tcs.2018.11.001}, pages = {46 -- 59}, year = {2019}, abstract = {In the context of black-box optimization, black-box complexity is used for understanding the inherent difficulty of a given optimization problem. Central to our understanding of nature-inspired search heuristics in this context is the notion of unbiasedness. Specialized black-box complexities have been developed in order to better understand the limitations of these heuristics - especially of (population-based) evolutionary algorithms (EAs). In contrast to this, we focus on a model for algorithms explicitly maintaining a probability distribution over the search space: so-called estimation-of-distribution algorithms (EDAs). We consider the recently introduced n-Bernoulli-lambda-EDA framework, which subsumes, for example, the commonly known EDAs PBIL, UMDA, lambda-MMAS(IB), and cGA. We show that an n-Bernoulli-lambda-EDA is unbiased if and only if its probability distribution satisfies a certain invariance property under isometric automorphisms of [0, 1](n). By restricting how an n-Bernoulli-lambda-EDA can perform an update, in a way common to many examples, we derive conciser characterizations, which are easy to verify. We demonstrate this by showing that our examples above are all unbiased. (C) 2018 Elsevier B.V. All rights reserved.}, language = {en} } @misc{KoetzingKrejca2018, author = {K{\"o}tzing, Timo and Krejca, Martin Stefan}, title = {First-Hitting times under additive drift}, series = {Parallel Problem Solving from Nature - PPSN XV, PT II}, volume = {11102}, journal = {Parallel Problem Solving from Nature - PPSN XV, PT II}, publisher = {Springer}, address = {Cham}, isbn = {978-3-319-99259-4}, issn = {0302-9743}, doi = {10.1007/978-3-319-99259-4_8}, pages = {92 -- 104}, year = {2018}, abstract = {For the last ten years, almost every theoretical result concerning the expected run time of a randomized search heuristic used drift theory, making it the arguably most important tool in this domain. Its success is due to its ease of use and its powerful result: drift theory allows the user to derive bounds on the expected first-hitting time of a random process by bounding expected local changes of the process - the drift. This is usually far easier than bounding the expected first-hitting time directly. Due to the widespread use of drift theory, it is of utmost importance to have the best drift theorems possible. We improve the fundamental additive, multiplicative, and variable drift theorems by stating them in a form as general as possible and providing examples of why the restrictions we keep are still necessary. Our additive drift theorem for upper bounds only requires the process to be nonnegative, that is, we remove unnecessary restrictions like a finite, discrete, or bounded search space. As corollaries, the same is true for our upper bounds in the case of variable and multiplicative drift.}, language = {en} } @misc{KoetzingKrejca2018, author = {K{\"o}tzing, Timo and Krejca, Martin Stefan}, title = {First-Hitting times for finite state spaces}, series = {Parallel Problem Solving from Nature - PPSN XV, PT II}, volume = {11102}, journal = {Parallel Problem Solving from Nature - PPSN XV, PT II}, publisher = {Springer}, address = {Cham}, isbn = {978-3-319-99259-4}, issn = {0302-9743}, doi = {10.1007/978-3-319-99259-4_7}, pages = {79 -- 91}, year = {2018}, abstract = {One of the most important aspects of a randomized algorithm is bounding its expected run time on various problems. Formally speaking, this means bounding the expected first-hitting time of a random process. The two arguably most popular tools to do so are the fitness level method and drift theory. The fitness level method considers arbitrary transition probabilities but only allows the process to move toward the goal. On the other hand, drift theory allows the process to move into any direction as long as it move closer to the goal in expectation; however, this tendency has to be monotone and, thus, the transition probabilities cannot be arbitrary. We provide a result that combines the benefit of these two approaches: our result gives a lower and an upper bound for the expected first-hitting time of a random process over {0,..., n} that is allowed to move forward and backward by 1 and can use arbitrary transition probabilities. In case that the transition probabilities are known, our bounds coincide and yield the exact value of the expected first-hitting time. Further, we also state the stationary distribution as well as the mixing time of a special case of our scenario.}, language = {en} } @phdthesis{Dyck2020, author = {Dyck, Johannes}, title = {Verification of graph transformation systems with k-inductive invariants}, doi = {10.25932/publishup-44274}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-442742}, school = {Universit{\"a}t Potsdam}, pages = {X, 364}, year = {2020}, abstract = {With rising complexity of today's software and hardware systems and the hypothesized increase in autonomous, intelligent, and self-* systems, developing correct systems remains an important challenge. Testing, although an important part of the development and maintainance process, cannot usually establish the definite correctness of a software or hardware system - especially when systems have arbitrarily large or infinite state spaces or an infinite number of initial states. This is where formal verification comes in: given a representation of the system in question in a formal framework, verification approaches and tools can be used to establish the system's adherence to its similarly formalized specification, and to complement testing. One such formal framework is the field of graphs and graph transformation systems. Both are powerful formalisms with well-established foundations and ongoing research that can be used to describe complex hardware or software systems with varying degrees of abstraction. Since their inception in the 1970s, graph transformation systems have continuously evolved; related research spans extensions of expressive power, graph algorithms, and their implementation, application scenarios, or verification approaches, to name just a few topics. This thesis focuses on a verification approach for graph transformation systems called k-inductive invariant checking, which is an extension of previous work on 1-inductive invariant checking. Instead of exhaustively computing a system's state space, which is a common approach in model checking, 1-inductive invariant checking symbolically analyzes graph transformation rules - i.e. system behavior - in order to draw conclusions with respect to the validity of graph constraints in the system's state space. The approach is based on an inductive argument: if a system's initial state satisfies a graph constraint and if all rules preserve that constraint's validity, we can conclude the constraint's validity in the system's entire state space - without having to compute it. However, inductive invariant checking also comes with a specific drawback: the locality of graph transformation rules leads to a lack of context information during the symbolic analysis of potential rule applications. This thesis argues that this lack of context can be partly addressed by using k-induction instead of 1-induction. A k-inductive invariant is a graph constraint whose validity in a path of k-1 rule applications implies its validity after any subsequent rule application - as opposed to a 1-inductive invariant where only one rule application is taken into account. Considering a path of transformations then accumulates more context of the graph rules' applications. As such, this thesis extends existing research and implementation on 1-inductive invariant checking for graph transformation systems to k-induction. In addition, it proposes a technique to perform the base case of the inductive argument in a symbolic fashion, which allows verification of systems with an infinite set of initial states. Both k-inductive invariant checking and its base case are described in formal terms. Based on that, this thesis formulates theorems and constructions to apply this general verification approach for typed graph transformation systems and nested graph constraints - and to formally prove the approach's correctness. Since unrestricted graph constraints may lead to non-termination or impracticably high execution times given a hypothetical implementation, this thesis also presents a restricted verification approach, which limits the form of graph transformation systems and graph constraints. It is formalized, proven correct, and its procedures terminate by construction. This restricted approach has been implemented in an automated tool and has been evaluated with respect to its applicability to test cases, its performance, and its degree of completeness.}, language = {en} } @phdthesis{Harmouch2020, author = {Harmouch, Hazar}, title = {Single-column data profiling}, doi = {10.25932/publishup-47455}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-474554}, school = {Universit{\"a}t Potsdam}, pages = {x, 115}, year = {2020}, abstract = {The research area of data profiling consists of a large set of methods and processes to examine a given dataset and determine metadata about it. Typically, different data profiling tasks address different kinds of metadata, comprising either various statistics about individual columns (Single-column Analysis) or relationships among them (Dependency Discovery). Among the basic statistics about a column are data type, header, the number of unique values (the column's cardinality), maximum and minimum values, the number of null values, and the value distribution. Dependencies involve, for instance, functional dependencies (FDs), inclusion dependencies (INDs), and their approximate versions. Data profiling has a wide range of conventional use cases, namely data exploration, cleansing, and integration. The produced metadata is also useful for database management and schema reverse engineering. Data profiling has also more novel use cases, such as big data analytics. The generated metadata describes the structure of the data at hand, how to import it, what it is about, and how much of it there is. Thus, data profiling can be considered as an important preparatory task for many data analysis and mining scenarios to assess which data might be useful and to reveal and understand a new dataset's characteristics. In this thesis, the main focus is on the single-column analysis class of data profiling tasks. We study the impact and the extraction of three of the most important metadata about a column, namely the cardinality, the header, and the number of null values. First, we present a detailed experimental study of twelve cardinality estimation algorithms. We classify the algorithms and analyze their efficiency, scaling far beyond the original experiments and testing theoretical guarantees. Our results highlight their trade-offs and point out the possibility to create a parallel or a distributed version of these algorithms to cope with the growing size of modern datasets. Then, we present a fully automated, multi-phase system to discover human-understandable, representative, and consistent headers for a target table in cases where headers are missing, meaningless, or unrepresentative for the column values. Our evaluation on Wikipedia tables shows that 60\% of the automatically discovered schemata are exact and complete. Considering more schema candidates, top-5 for example, increases this percentage to 72\%. Finally, we formally and experimentally show the ghost and fake FDs phenomenon caused by FD discovery over datasets with missing values. We propose two efficient scores, probabilistic and likelihood-based, for estimating the genuineness of a discovered FD. Our extensive set of experiments on real-world and semi-synthetic datasets show the effectiveness and efficiency of these scores.}, language = {en} } @phdthesis{Mandal2019, author = {Mandal, Sankalita}, title = {Event handling in business processes}, doi = {10.25932/publishup-44170}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-441700}, school = {Universit{\"a}t Potsdam}, pages = {xix, 151}, year = {2019}, abstract = {Business process management (BPM) deals with modeling, executing, monitoring, analyzing, and improving business processes. During execution, the process communicates with its environment to get relevant contextual information represented as events. Recent development of big data and the Internet of Things (IoT) enables sources like smart devices and sensors to generate tons of events which can be filtered, grouped, and composed to trigger and drive business processes. The industry standard Business Process Model and Notation (BPMN) provides several event constructs to capture the interaction possibilities between a process and its environment, e.g., to instantiate a process, to abort an ongoing activity in an exceptional situation, to take decisions based on the information carried by the events, as well as to choose among the alternative paths for further process execution. The specifications of such interactions are termed as event handling. However, in a distributed setup, the event sources are most often unaware of the status of process execution and therefore, an event is produced irrespective of the process being ready to consume it. BPMN semantics does not support such scenarios and thus increases the chance of processes getting delayed or getting in a deadlock by missing out on event occurrences which might still be relevant. The work in this thesis reviews the challenges and shortcomings of integrating real-world events into business processes, especially the subscription management. The basic integration is achieved with an architecture consisting of a process modeler, a process engine, and an event processing platform. Further, points of subscription and unsubscription along the process execution timeline are defined for different BPMN event constructs. Semantic and temporal dependencies among event subscription, event occurrence, event consumption and event unsubscription are considered. To this end, an event buffer with policies for updating the buffer, retrieving the most suitable event for the current process instance, and reusing the event has been discussed that supports issuing of early subscription. The Petri net mapping of the event handling model provides our approach with a translation of semantics from a business process perspective. Two applications based on this formal foundation are presented to support the significance of different event handling configurations on correct process execution and reachability of a process path. Prototype implementations of the approaches show that realizing flexible event handling is feasible with minor extensions of off-the-shelf process engines and event platforms.}, language = {en} } @phdthesis{Taeumel2020, author = {Taeumel, Marcel}, title = {Data-driven tool construction in exploratory programming environments}, doi = {10.25932/publishup-44428}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-444289}, school = {Universit{\"a}t Potsdam}, pages = {xiv, 299}, year = {2020}, abstract = {This work presents a new design for programming environments that promote the exploration of domain-specific software artifacts and the construction of graphical tools for such program comprehension tasks. In complex software projects, tool building is essential because domain- or task-specific tools can support decision making by representing concerns concisely with low cognitive effort. In contrast, generic tools can only support anticipated scenarios, which usually align with programming language concepts or well-known project domains. However, the creation and modification of interactive tools is expensive because the glue that connects data to graphics is hard to find, change, and test. Even if valuable data is available in a common format and even if promising visualizations could be populated, programmers have to invest many resources to make changes in the programming environment. Consequently, only ideas of predictably high value will be implemented. In the non-graphical, command-line world, the situation looks different and inspiring: programmers can easily build their own tools as shell scripts by configuring and combining filter programs to process data. We propose a new perspective on graphical tools and provide a concept to build and modify such tools with a focus on high quality, low effort, and continuous adaptability. That is, (1) we propose an object-oriented, data-driven, declarative scripting language that reduces the amount of and governs the effects of glue code for view-model specifications, and (2) we propose a scalable UI-design language that promotes short feedback loops in an interactive, graphical environment such as Morphic known from Self or Squeak/Smalltalk systems. We implemented our concept as a tool building environment, which we call VIVIDE, on top of Squeak/Smalltalk and Morphic. We replaced existing code browsing and debugging tools to iterate within our solution more quickly. In several case studies with undergraduate and graduate students, we observed that VIVIDE can be applied to many domains such as live language development, source-code versioning, modular code browsing, and multi-language debugging. Then, we designed a controlled experiment to measure the effect on the time to build tools. Several pilot runs showed that training is crucial and, presumably, takes days or weeks, which implies a need for further research. As a result, programmers as users can directly work with tangible representations of their software artifacts in the VIVIDE environment. Tool builders can write domain-specific scripts to populate views to approach comprehension tasks from different angles. Our novel perspective on graphical tools can inspire the creation of new trade-offs in modularity for both data providers and view designers.}, language = {en} } @article{Schlosser2016, author = {Schlosser, Rainer}, title = {Stochastic dynamic pricing and advertising in isoelastic oligopoly models}, series = {European Journal of Operational Research}, volume = {259}, journal = {European Journal of Operational Research}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0377-2217}, doi = {10.1016/j.ejor.2016.11.021}, pages = {1144 -- 1155}, year = {2016}, abstract = {In this paper, we analyze stochastic dynamic pricing and advertising differential games in special oligopoly markets with constant price and advertising elasticity. We consider the sale of perishable as well as durable goods and include adoption effects in the demand. Based on a unique stochastic feedback Nash equilibrium, we derive closed-form solution formulas of the value functions and the optimal feedback policies of all competing firms. Efficient simulation techniques are used to evaluate optimally controlled sales processes over time. This way, the evolution of optimal controls as well as the firms' profit distributions are analyzed. Moreover, we are able to compare feedback solutions of the stochastic model with its deterministic counterpart. We show that the market power of the competing firms is exactly the same as in the deterministic version of the model. Further, we discover two fundamental effects that determine the relation between both models. First, the volatility in demand results in a decline of expected profits compared to the deterministic model. Second, we find that saturation effects in demand have an opposite character. We show that the second effect can be strong enough to either exactly balance or even overcompensate the first one. As a result we are able to identify cases in which feedback solutions of the deterministic model provide useful approximations of solutions of the stochastic model.}, language = {en} } @article{PerscheidSiegmundTaeumeletal.2017, author = {Perscheid, Michael and Siegmund, Benjamin and Taeumel, Marcel and Hirschfeld, Robert}, title = {Studying the advancement in debugging practice of professional software developers}, series = {Software Quality Journal}, volume = {25}, journal = {Software Quality Journal}, publisher = {Springer}, address = {Dordrecht}, issn = {0963-9314}, doi = {10.1007/s11219-015-9294-2}, pages = {83 -- 110}, year = {2017}, abstract = {In 1997, Henry Lieberman stated that debugging is the dirty little secret of computer science. Since then, several promising debugging technologies have been developed such as back-in-time debuggers and automatic fault localization methods. However, the last study about the state-of-the-art in debugging is still more than 15 years old and so it is not clear whether these new approaches have been applied in practice or not. For that reason, we investigate the current state of debugging in a comprehensive study. First, we review the available literature and learn about current approaches and study results. Second, we observe several professional developers while debugging and interview them about their experiences. Third, we create a questionnaire that serves as the basis for a larger online debugging survey. Based on these results, we present new insights into debugging practice that help to suggest new directions for future research.}, language = {en} } @article{MenningGrasnickEwaldetal.2018, author = {Menning, Axel and Grasnick, Bastien M. and Ewald, Benedikt and Dobrigkeit, Franziska and Nicolai, Claudia}, title = {Verbal focus shifts}, series = {Design Studies}, volume = {57}, journal = {Design Studies}, publisher = {Elsevier}, address = {Oxford}, issn = {0142-694X}, doi = {10.1016/j.destud.2018.03.003}, pages = {135 -- 155}, year = {2018}, abstract = {Previous studies on design behaviour indicate that focus shifts positively influence ideational productivity. In this study we want to take a closer look at how these focus shifts look on the verbal level. We describe a mutually influencing relationship between mental focus shifts and verbal low coherent statements. In a case study based on the DTRS11 dataset we identify 297 low coherent statements via a combined topic modelling and manual approach. We introduce a categorization of the different instances of low coherent statements. The results indicate that designers tend to shift topics within an existing design issue instead of completely disrupting it. (C) 2018 Elsevier Ltd. All rights reserved.}, language = {en} } @article{YousfiHeweltBaueretal.2018, author = {Yousfi, Alaaeddine and Hewelt, Marcin and Bauer, Christine and Weske, Mathias}, title = {Toward uBPMN-Based patterns for modeling ubiquitous business processes}, series = {IEEE Transactions on Industrial Informatics}, volume = {14}, journal = {IEEE Transactions on Industrial Informatics}, number = {8}, publisher = {Inst. of Electr. and Electronics Engineers}, address = {Piscataway}, issn = {1551-3203}, doi = {10.1109/TII.2017.2777847}, pages = {3358 -- 3367}, year = {2018}, abstract = {Ubiquitous business processes are the new generation of processes that pervade the physical space and interact with their environments using a minimum of human involvement. Although they are now widely deployed in the industry, their deployment is still ad hoc . They are implemented after an arbitrary modeling phase or no modeling phase at all. The absence of a solid modeling phase backing up the implementation generates many loopholes that are stressed in the literature. Here, we tackle the issue of modeling ubiquitous business processes. We propose patterns to represent the recent ubiquitous computing features. These patterns are the outcome of an analysis we conducted in the field of human-computer interaction to examine how the features are actually deployed. The patterns' understandability, ease-of-use, usefulness, and completeness are examined via a user experiment. The results indicate that these four indexes are on the positive track. Hence, the patterns may be the backbone of ubiquitous business process modeling in industrial applications.}, language = {en} } @article{NikajWeskeMendling2019, author = {Nikaj, Adriatik and Weske, Mathias and Mendling, Jan}, title = {Semi-automatic derivation of RESTful choreographies from business process choreographies}, series = {Software and systems modeling}, volume = {18}, journal = {Software and systems modeling}, number = {2}, publisher = {Springer}, address = {Heidelberg}, issn = {1619-1366}, doi = {10.1007/s10270-017-0653-2}, pages = {1195 -- 1208}, year = {2019}, abstract = {Enterprises reach out for collaborations with other organizations in order to offer complex products and services to the market. Such collaboration and coordination between different organizations, for a good share, is facilitated by information technology. The BPMN process choreography is a modeling language for specifying the exchange of information and services between different organizations at the business level. Recently, there is a surging use of the REST architectural style for the provisioning of services on the web, but few systematic engineering approach to design their collaboration. In this paper, we address this gap in a comprehensive way by defining a semi-automatic method for the derivation of RESTful choreographies from process choreographies. The method is based on natural language analysis techniques to derive interactions from the textual information in process choreographies. The proposed method is evaluated in terms of effectiveness resulting in the intervention of a web engineer in only about 10\% of all generated RESTful interactions.}, language = {en} } @article{PufahlWeske2019, author = {Pufahl, Luise and Weske, Mathias}, title = {Batch activity: enhancing business process modeling and enactment with batch processing}, series = {Computing}, volume = {101}, journal = {Computing}, number = {12}, publisher = {Springer}, address = {Wien}, issn = {0010-485X}, doi = {10.1007/s00607-019-00717-4}, pages = {1909 -- 1933}, year = {2019}, abstract = {Organizations strive for efficiency in their business processes by process improvement and automation. Business process management (BPM) supports these efforts by capturing business processes in process models serving as blueprint for a number of process instances. In BPM, process instances are typically considered running independently of each other. However, batch processing-the collectively execution of several instances at specific process activities-is a common phenomenon in operational processes to reduce cost or time. Currently, batch processing is organized manually or hard-coded in software. For allowing stakeholders to explicitly represent their batch configurations in process models and their automatic execution, this paper provides a concept for batch activities and describes the corresponding execution semantics. The batch activity concept is evaluated in a two-step approach: a prototypical implementation in an existing BPM System proves its feasibility. Additionally, batch activities are applied to different use cases in a simulated environment. Its application implies cost-savings when a suitable batch configuration is selected. The batch activity concept contributes to practice by allowing the specification of batch work in process models and their automatic execution, and to research by extending the existing process modeling concepts.}, language = {en} } @article{BazhenovaZerbatoOlibonietal.2019, author = {Bazhenova, Ekaterina and Zerbato, Francesca and Oliboni, Barbara and Weske, Mathias}, title = {From BPMN process models to DMN decision models}, series = {Information systems}, volume = {83}, journal = {Information systems}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0306-4379}, doi = {10.1016/j.is.2019.02.001}, pages = {69 -- 88}, year = {2019}, abstract = {The interplay between process and decision models plays a crucial role in business process management, as decisions may be based on running processes and affect process outcomes. Often process models include decisions that are encoded through process control flow structures and data flow elements, thus reducing process model maintainability. The Decision Model and Notation (DMN) was proposed to achieve separation of concerns and to possibly complement the Business Process Model and Notation (BPMN) for designing decisions related to process models. Nevertheless, deriving decision models from process models remains challenging, especially when the same data underlie both process and decision models. In this paper, we explore how and to which extent the data modeled in BPMN processes and used for decision-making may be represented in the corresponding DMN decision models. To this end, we identify a set of patterns that capture possible representations of data in BPMN processes and that can be used to guide the derivation of decision models related to existing process models. Throughout the paper we refer to real-world healthcare processes to show the applicability of the proposed approach. (C) 2019 Elsevier Ltd. All rights reserved.}, language = {en} } @misc{DiazMendezSchoelzel2018, author = {Diaz, Sergio and Mendez, Diego and Sch{\"o}lzel, Mario}, title = {Dynamic Gallager-Humblet-Spira Algorithm for Wireless Sensor Networks}, series = {2018 IEEE Colombian Conference on Communications and Computing (COLCOM)}, journal = {2018 IEEE Colombian Conference on Communications and Computing (COLCOM)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-6820-7}, pages = {6}, year = {2018}, abstract = {The problem of constructing and maintaining a tree topology in a distributed manner is a challenging task in WSNs. This is because the nodes have limited computational and memory resources and the network changes over time. We propose the Dynamic Gallager-Humblet-Spira (D-GHS) algorithm that builds and maintains a minimum spanning tree. To do so, we divide D-GHS into four phases, namely neighbor discovery, tree construction, data collection, and tree maintenance. In the neighbor discovery phase, the nodes collect information about their neighbors and the link quality. In the tree construction, D-GHS finds the minimum spanning tree by executing the Gallager-Humblet-Spira algorithm. In the data collection phase, the sink roots the minimum spanning tree at itself, and each node sends data packets. In the tree maintenance phase, the nodes repair the tree when communication failures occur. The emulation results show that D-GHS reduces the number of control messages and the energy consumption, at the cost of a slight increase in memory size and convergence time.}, language = {en} } @misc{BoissierKurzynski2018, author = {Boissier, Martin and Kurzynski, Daniel}, title = {Workload-Driven Horizontal Partitioning and Pruning for Large HTAP Systems}, series = {2018 IEEE 34th International Conference on Data Engineering Workshops (ICDEW)}, journal = {2018 IEEE 34th International Conference on Data Engineering Workshops (ICDEW)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-6306-6}, doi = {10.1109/ICDEW.2018.00026}, pages = {116 -- 121}, year = {2018}, abstract = {Modern server systems with large NUMA architectures necessitate (i) data being distributed over the available computing nodes and (ii) NUMA-aware query processing to enable effective parallel processing in database systems. As these architectures incur significant latency and throughout penalties for accessing non-local data, queries should be executed as close as possible to the data. To further increase both performance and efficiency, data that is not relevant for the query result should be skipped as early as possible. One way to achieve this goal is horizontal partitioning to improve static partition pruning. As part of our ongoing work on workload-driven partitioning, we have implemented a recent approach called aggressive data skipping and extended it to handle both analytical as well as transactional access patterns. In this paper, we evaluate this approach with the workload and data of a production enterprise system of a Global 2000 company. The results show that over 80\% of all tuples can be skipped in average while the resulting partitioning schemata are surprisingly stable over time.}, language = {en} } @misc{PatalasMaliszewskaKrebs2018, author = {Patalas-Maliszewska, Justyna and Krebs, Irene}, title = {An Information System Supporting the Eliciting of Expert Knowledge for Successful IT Projects}, series = {Information and Software Technologies, ICIST 2018}, volume = {920}, journal = {Information and Software Technologies, ICIST 2018}, publisher = {Springer}, address = {Berlin}, isbn = {978-3-319-99972-2}, issn = {1865-0929}, doi = {10.1007/978-3-319-99972-2_1}, pages = {3 -- 13}, year = {2018}, abstract = {In order to guarantee the success of an IT project, it is necessary for a company to possess expert knowledge. The difficulty arises when experts no longer work for the company and it then becomes necessary to use their knowledge, in order to realise an IT project. In this paper, the ExKnowIT information system which supports the eliciting of expert knowledge for successful IT projects, is presented and consists of the following modules: (1) the identification of experts for successful IT projects, (2) the eliciting of expert knowledge on completed IT projects, (3) the expert knowledge base on completed IT projects, (4) the Group Method for Data Handling (GMDH) algorithm, (5) new knowledge in support of decisions regarding the selection of a manager for a new IT project. The added value of our system is that these three approaches, namely, the elicitation of expert knowledge, the success of an IT project and the discovery of new knowledge, gleaned from the expert knowledge base, otherwise known as the decision model, complement each other.}, language = {en} } @misc{IonBaudisch2018, author = {Ion, Alexandra and Baudisch, Patrick Markus}, title = {Metamaterial Devices}, publisher = {Association for Computing Machinery}, address = {New York}, isbn = {978-1-4503-5819-4}, doi = {10.1145/3214822.3214827}, pages = {2}, year = {2018}, abstract = {In our hands-on demonstration, we show several objects, the functionality of which is defined by the objects' internal micro-structure. Such metamaterial machines can (1) be mechanisms based on their microstructures, (2) employ simple mechanical computation, or (3) change their outside to interact with their environment. They are 3D printed from one piece and we support their creating by providing interactive software tools.}, language = {en} } @misc{BazhenovaZerbatoWeske2018, author = {Bazhenova, Ekaterina and Zerbato, Francesca and Weske, Mathias}, title = {Data-Centric Extraction of DMN Decision Models from BPMN Process Models}, series = {Business Process Management Workshops}, volume = {308}, journal = {Business Process Management Workshops}, publisher = {Springer}, address = {Berlin}, isbn = {978-3-319-74030-0}, issn = {1865-1348}, doi = {10.1007/978-3-319-74030-0_43}, pages = {542 -- 555}, year = {2018}, abstract = {Operational decisions in business processes can be modeled by using the Decision Model and Notation (DMN). The complementary use of DMN for decision modeling and of the Business Process Model and Notation (BPMN) for process design realizes the separation of concerns principle. For supporting separation of concerns during the design phase, it is crucial to understand which aspects of decision-making enclosed in a process model should be captured by a dedicated decision model. Whereas existing work focuses on the extraction of decision models from process control flow, the connection of process-related data and decision models is still unexplored. In this paper, we investigate how process-related data used for making decisions can be represented in process models and we distinguish a set of BPMN patterns capturing such information. Then, we provide a formal mapping of the identified BPMN patterns to corresponding DMN models and apply our approach to a real-world healthcare process.}, language = {en} } @article{ReinTaeumelHirschfeld2017, author = {Rein, Patrick and Taeumel, Marcel and Hirschfeld, Robert}, title = {Making the domain tangible}, series = {Design Thinking Research}, journal = {Design Thinking Research}, publisher = {Springer}, address = {New York}, isbn = {978-3-319-60967-6}, doi = {10.1007/978-3-319-60967-6_9}, pages = {171 -- 194}, year = {2017}, abstract = {Programmers collaborate continuously with domain experts to explore the problem space and to shape a solution that fits the users' needs. In doing so, all parties develop a shared vocabulary, which is above all a list of named concepts and their relationships to each other. Nowadays, many programmers favor object-oriented programming because it allows them to directly represent real-world concepts and interactions from the vocabulary as code. However, when existing domain data is not yet represented as objects, it becomes a challenge to initially bring existing domain data into object-oriented systems and to keep the source code readable. While source code might be comprehensible to programmers, domain experts can struggle, given their non-programming background. We present a new approach to provide a mapping of existing data sources into the object-oriented programming environment. We support keeping the code of the domain model compact and readable while adding implicit means to access external information as internal domain objects. This should encourage programmers to explore different ways to build the software system quickly. Eventually, our approach fosters communication with the domain experts, especially at the beginning of a project. When the details in the problem space are not yet clear, the source code provides a valuable, tangible communication artifact.}, language = {en} } @misc{PufahlWongWeske2018, author = {Pufahl, Luise and Wong, Tsun Yin and Weske, Mathias}, title = {Design of an extensible BPMN process simulator}, series = {Business Process Management Workshops (BPM 2017)}, volume = {308}, journal = {Business Process Management Workshops (BPM 2017)}, publisher = {Springer}, address = {Berlin}, isbn = {978-3-319-74030-0}, issn = {1865-1348}, doi = {10.1007/978-3-319-74030-0_62}, pages = {782 -- 795}, year = {2018}, abstract = {Business process simulation is an important means for quantitative analysis of a business process and to compare different process alternatives. With the Business Process Model and Notation (BPMN) being the state-of-the-art language for the graphical representation of business processes, many existing process simulators support already the simulation of BPMN diagrams. However, they do not provide well-defined interfaces to integrate new concepts in the simulation environment. In this work, we present the design and architecture of a proof-of-concept implementation of an open and extensible BPMN process simulator. It also supports the simulation of multiple BPMN processes at a time and relies on the building blocks of the well-founded discrete event simulation. The extensibility is assured by a plug-in concept. Its feasibility is demonstrated by extensions supporting new BPMN concepts, such as the simulation of business rule activities referencing decision models and batch activities.}, language = {en} } @article{RischKrestel2019, author = {Risch, Julian and Krestel, Ralf}, title = {Domain-specific word embeddings for patent classification}, series = {Data Technologies and Applications}, volume = {53}, journal = {Data Technologies and Applications}, number = {1}, publisher = {Emerald Group Publishing Limited}, address = {Bingley}, issn = {2514-9288}, doi = {10.1108/DTA-01-2019-0002}, pages = {108 -- 122}, year = {2019}, abstract = {Purpose Patent offices and other stakeholders in the patent domain need to classify patent applications according to a standardized classification scheme. The purpose of this paper is to examine the novelty of an application it can then be compared to previously granted patents in the same class. Automatic classification would be highly beneficial, because of the large volume of patents and the domain-specific knowledge needed to accomplish this costly manual task. However, a challenge for the automation is patent-specific language use, such as special vocabulary and phrases. Design/methodology/approach To account for this language use, the authors present domain-specific pre-trained word embeddings for the patent domain. The authors train the model on a very large data set of more than 5m patents and evaluate it at the task of patent classification. To this end, the authors propose a deep learning approach based on gated recurrent units for automatic patent classification built on the trained word embeddings. Findings Experiments on a standardized evaluation data set show that the approach increases average precision for patent classification by 17 percent compared to state-of-the-art approaches. In this paper, the authors further investigate the model's strengths and weaknesses. An extensive error analysis reveals that the learned embeddings indeed mirror patent-specific language use. The imbalanced training data and underrepresented classes are the most difficult remaining challenge. Originality/value The proposed approach fulfills the need for domain-specific word embeddings for downstream tasks in the patent domain, such as patent classification or patent analysis.}, language = {en} } @article{MoeringdeMutiis2019, author = {M{\"o}ring, Sebastian and de Mutiis, Marco}, title = {Camera Ludica}, series = {Intermedia games - Games inter media : Video games and intermediality}, journal = {Intermedia games - Games inter media : Video games and intermediality}, publisher = {Bloomsbury academic}, address = {New York}, isbn = {978-1-5013-3051-3}, pages = {69 -- 93}, year = {2019}, language = {en} } @misc{Matthies2019, author = {Matthies, Christoph}, title = {Agile process improvement in retrospectives}, series = {41st International Conference on Software Engineering: Companion Proceedings (ICSE-Companion)}, journal = {41st International Conference on Software Engineering: Companion Proceedings (ICSE-Companion)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-7281-1764-5}, issn = {2574-1934}, doi = {10.1109/ICSE-Companion.2019.00063}, pages = {150 -- 152}, year = {2019}, abstract = {Working in iterations and repeatedly improving team workflows based on collected feedback is fundamental to agile software development processes. Scrum, the most popular agile method, provides dedicated retrospective meetings to reflect on the last development iteration and to decide on process improvement actions. However, agile methods do not prescribe how these improvement actions should be identified, managed or tracked in detail. The approaches to detect and remove problems in software development processes are therefore often based on intuition and prior experiences and perceptions of team members. Previous research in this area has focused on approaches to elicit a team's improvement opportunities as well as measurements regarding the work performed in an iteration, e.g. Scrum burn-down charts. Little research deals with the quality and nature of identified problems or how progress towards removing issues is measured. In this research, we investigate how agile development teams in the professional software industry organize their feedback and process improvement approaches. In particular, we focus on the structure and content of improvement and reflection meetings, i.e. retrospectives, and their outcomes. Researching how the vital mechanism of process improvement is implemented in practice in modern software development leads to a more complete picture of agile process improvement.}, language = {en} } @misc{Matthies2019, author = {Matthies, Christoph}, title = {Feedback in Scrum}, series = {2019 IEEE/ACM 41st International Conference on Software Engineering: Companion Proceedings (ICSE-Companion)}, journal = {2019 IEEE/ACM 41st International Conference on Software Engineering: Companion Proceedings (ICSE-Companion)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-7281-1764-5}, issn = {2574-1934}, doi = {10.1109/ICSE-Companion.2019.00081}, pages = {198 -- 201}, year = {2019}, abstract = {Improving the way that teams work together by reflecting and improving the executed process is at the heart of agile processes. The idea of iterative process improvement takes various forms in different agile development methodologies, e.g. Scrum Retrospectives. However, these methods do not prescribe how improvement steps should be conducted in detail. In this research we investigate how agile software teams can use their development data, such as commits or tickets, created during regular development activities, to drive and track process improvement steps. Our previous research focused on data-informed process improvement in the context of student teams, where controlled circumstances and deep domain knowledge allowed creation and usage of specific process measures. Encouraged by positive results in this area, we investigate the process improvement approaches employed in industry teams. Researching how the vital mechanism of process improvement is implemented and how development data is already being used in practice in modern software development leads to a more complete picture of agile process improvement. It is the first step in enabling a data-informed feedback and improvement process, tailored to a team's context and based on the development data of individual teams.}, language = {en} } @misc{BrandGiese2019, author = {Brand, Thomas and Giese, Holger}, title = {Generic adaptive monitoring based on executed architecture runtime model queries and events}, series = {IEEE Xplore}, journal = {IEEE Xplore}, publisher = {IEEE}, address = {New York}, isbn = {978-1-7281-2731-6}, issn = {1949-3673}, doi = {10.1109/SASO.2019.00012}, pages = {17 -- 22}, year = {2019}, abstract = {Monitoring is a key functionality for automated decision making as it is performed by self-adaptive systems, too. Effective monitoring provides the relevant information on time. This can be achieved with exhaustive monitoring causing a high overhead consumption of economical and ecological resources. In contrast, our generic adaptive monitoring approach supports effectiveness with increased efficiency. Also, it adapts to changes regarding the information demand and the monitored system without additional configuration and software implementation effort. The approach observes the executions of runtime model queries and processes change events to determine the currently required monitoring configuration. In this paper we explicate different possibilities to use the approach and evaluate their characteristics regarding the phenomenon detection time and the monitoring effort. Our approach allows balancing between those two characteristics. This makes it an interesting option for the monitoring function of self-adaptive systems because for them usually very short-lived phenomena are not relevant.}, language = {en} } @misc{BruechnerRenzKlingbeil2019, author = {Bruechner, Dominik and Renz, Jan and Klingbeil, Mandy}, title = {Creating a Framework for User-Centered Development and Improvement of Digital Education}, series = {Scale}, journal = {Scale}, publisher = {Association for Computing Machinery}, address = {New York}, isbn = {978-1-4503-6804-9}, doi = {10.1145/3330430.3333644}, pages = {4}, year = {2019}, abstract = {We investigate how the technology acceptance and learning experience of the digital education platform HPI Schul-Cloud (HPI School Cloud) for German secondary school teachers can be improved by proposing a user-centered research and development framework. We highlight the importance of developing digital learning technologies in a user-centered way to take differences in the requirements of educators and students into account. We suggest applying qualitative and quantitative methods to build a solid understanding of a learning platform's users, their needs, requirements, and their context of use. After concept development and idea generation of features and areas of opportunity based on the user research, we emphasize on the application of a multi-attribute utility analysis decision-making framework to prioritize ideas rationally, taking results of user research into account. Afterward, we recommend applying the principle build-learn-iterate to build prototypes in different resolutions while learning from user tests and improving the selected opportunities. Last but not least, we propose an approach for continuous short- and long-term user experience controlling and monitoring, extending existing web- and learning analytics metrics.}, language = {en} } @misc{BiloFriedrichLenzneretal.2019, author = {Bilo, Davide and Friedrich, Tobias and Lenzner, Pascal and Melnichenko, Anna}, title = {Geometric Network Creation Games}, series = {SPAA '19: The 31st ACM Symposium on Parallelism in Algorithms and Architectures}, journal = {SPAA '19: The 31st ACM Symposium on Parallelism in Algorithms and Architectures}, publisher = {Association for Computing Machinery}, address = {New York}, isbn = {978-1-4503-6184-2}, doi = {10.1145/3323165.3323199}, pages = {323 -- 332}, year = {2019}, abstract = {Network Creation Games are a well-known approach for explaining and analyzing the structure, quality and dynamics of real-world networks like the Internet and other infrastructure networks which evolved via the interaction of selfish agents without a central authority. In these games selfish agents which correspond to nodes in a network strategically buy incident edges to improve their centrality. However, past research on these games has only considered the creation of networks with unit-weight edges. In practice, e.g. when constructing a fiber-optic network, the choice of which nodes to connect and also the induced price for a link crucially depends on the distance between the involved nodes and such settings can be modeled via edge-weighted graphs. We incorporate arbitrary edge weights by generalizing the well-known model by Fabrikant et al. [PODC'03] to edge-weighted host graphs and focus on the geometric setting where the weights are induced by the distances in some metric space. In stark contrast to the state-of-the-art for the unit-weight version, where the Price of Anarchy is conjectured to be constant and where resolving this is a major open problem, we prove a tight non-constant bound on the Price of Anarchy for the metric version and a slightly weaker upper bound for the non-metric case. Moreover, we analyze the existence of equilibria, the computational hardness and the game dynamics for several natural metrics. The model we propose can be seen as the game-theoretic analogue of a variant of the classical Network Design Problem. Thus, low-cost equilibria of our game correspond to decentralized and stable approximations of the optimum network design.}, language = {en} } @misc{GonzalezLopezPufahl2019, author = {Gonzalez-Lopez, Fernanda and Pufahl, Luise}, title = {A Landscape for Case Models}, series = {Enterprise, Business-Process and Information Systems Modeling}, volume = {352}, journal = {Enterprise, Business-Process and Information Systems Modeling}, publisher = {Springer}, address = {Berlin}, isbn = {978-3-030-20618-5}, issn = {1865-1348}, doi = {10.1007/978-3-030-20618-5_6}, pages = {87 -- 102}, year = {2019}, abstract = {Case Management is a paradigm to support knowledge-intensive processes. The different approaches developed for modeling these types of processes tend to result in scattered models due to the low abstraction level at which the inherently complex processes are therein represented. Thus, readability and understandability is more challenging than that of traditional process models. By reviewing existing proposals in the field of process overviews and case models, this paper extends a case modeling language - the fragment-based Case Management (fCM) language - with the goal of modeling knowledge-intensive processes from a higher abstraction level - to generate a so-called fCM landscape. This proposal is empirically evaluated via an online experiment. Results indicate that interpreting an fCM landscape might be more effective and efficient than interpreting an informationally equivalent case model.}, language = {en} } @article{LambersBornKosioletal.2018, author = {Lambers, Leen and Born, Kristopher and Kosiol, Jens and Str{\"u}ber, Daniel and Taentzer, Gabriele}, title = {Granularity of conflicts and dependencies in graph transformation systems}, series = {Journal of Logical and Algebraic Methods in Programming}, volume = {103}, journal = {Journal of Logical and Algebraic Methods in Programming}, publisher = {Elsevier}, address = {New York}, issn = {2352-2208}, doi = {10.1016/j.jlamp.2018.11.004}, pages = {105 -- 129}, year = {2018}, abstract = {Conflict and dependency analysis (CDA) is a static analysis for the detection of conflicting and dependent rule applications in a graph transformation system. The state-of-the-art CDA technique, critical pair analysis, provides all potential conflicts and dependencies in minimal context as critical pairs, for each pair of rules. Yet, critical pairs can be hard to understand; users are mainly interested in core information about conflicts and dependencies occurring in various combinations. In this paper, we present an approach to conflicts and dependencies in graph transformation systems based on two dimensions of granularity. The first dimension refers to the overlap considered between the rules of a given rule pair; the second one refers to the represented amount of context information about transformations in which the conflicts occur. We introduce a variety of new conflict notions, in particular, conflict atoms, conflict reasons, and minimal conflict reasons, relate them to the existing conflict notions of critical pairs and initial conflicts, and position all of these notions within our granularity approach. Finally, we introduce dual concepts for dependency analysis. As we discuss in a running example, our approach paves the way for an improved CDA technique. (C) 2018 Elsevier Inc. All rights reserved.}, language = {en} } @article{SchlosserWaltherBoissieretal.2019, author = {Schlosser, Rainer and Walther, Carsten and Boissier, Martin and Uflacker, Matthias}, title = {Automated repricing and ordering strategies in competitive markets}, series = {AI communications : AICOM ; the European journal on artificial intelligence}, volume = {32}, journal = {AI communications : AICOM ; the European journal on artificial intelligence}, number = {1}, publisher = {IOS Press}, address = {Amsterdam}, issn = {0921-7126}, doi = {10.3233/AIC-180603}, pages = {15 -- 29}, year = {2019}, abstract = {Merchants on modern e-commerce platforms face a highly competitive environment. They compete against each other using automated dynamic pricing and ordering strategies. Successfully managing both inventory levels as well as offer prices is a challenging task as (i) demand is uncertain, (ii) competitors strategically interact, and (iii) optimized pricing and ordering decisions are mutually dependent. We show how to derive optimized data-driven pricing and ordering strategies which are based on demand learning techniques and efficient dynamic optimization models. We verify the superior performance of our self-adaptive strategies by comparing them to different rule-based as well as data-driven strategies in duopoly and oligopoly settings. Further, to study and to optimize joint dynamic ordering and pricing strategies on online marketplaces, we built an interactive simulation platform. To be both flexible and scalable, the platform has a microservice-based architecture and allows handling dozens of competing merchants and streams of consumers with configurable characteristics.}, language = {en} } @misc{HalfpapSchlosser2019, author = {Halfpap, Stefan and Schlosser, Rainer}, title = {Workload-Driven Fragment Allocation for Partially Replicated Databases Using Linear Programming}, series = {2019 IEEE 35th International Conference on Data Engineering (ICDE)}, journal = {2019 IEEE 35th International Conference on Data Engineering (ICDE)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-7474-1}, issn = {1084-4627}, doi = {10.1109/ICDE.2019.00188}, pages = {1746 -- 1749}, year = {2019}, abstract = {In replication schemes, replica nodes can process read-only queries on snapshots of the master node without violating transactional consistency. By analyzing the workload, we can identify query access patterns and replicate data depending to its access frequency. In this paper, we define a linear programming (LP) model to calculate the set of partial replicas with the lowest overall memory capacity while evenly balancing the query load. Furthermore, we propose a scalable decomposition heuristic to calculate solutions for larger problem sizes. While guaranteeing the same performance as state-of-the-art heuristics, our decomposition approach calculates allocations with up to 23\% lower memory footprint for the TPC-H benchmark.}, language = {en} } @misc{HalfpapSchlosser2019, author = {Halfpap, Stefan and Schlosser, Rainer}, title = {A Comparison of Allocation Algorithms for Partially Replicated Databases}, series = {2019 IEEE 35th International Conference on Data Engineering (ICDE)}, journal = {2019 IEEE 35th International Conference on Data Engineering (ICDE)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-7474-1}, issn = {1084-4627}, doi = {10.1109/ICDE.2019.00226}, pages = {2008 -- 2011}, year = {2019}, abstract = {Increasing demand for analytical processing capabilities can be managed by replication approaches. However, to evenly balance the replicas' workload shares while at the same time minimizing the data replication factor is a highly challenging allocation problem. As optimal solutions are only applicable for small problem instances, effective heuristics are indispensable. In this paper, we test and compare state-of-the-art allocation algorithms for partial replication. By visualizing and exploring their (heuristic) solutions for different benchmark workloads, we are able to derive structural insights and to detect an algorithm's strengths as well as its potential for improvement. Further, our application enables end-to-end evaluations of different allocations to verify their theoretical performance.}, language = {en} } @article{FriedrichKrejcaRothenbergeretal.2019, author = {Friedrich, Tobias and Krejca, Martin Stefan and Rothenberger, Ralf and Arndt, Tobias and Hafner, Danijar and Kellermeier, Thomas and Krogmann, Simon and Razmjou, Armin}, title = {Routing for on-street parking search using probabilistic data}, series = {AI communications : AICOM ; the European journal on artificial intelligence}, volume = {32}, journal = {AI communications : AICOM ; the European journal on artificial intelligence}, number = {2}, publisher = {IOS Press}, address = {Amsterdam}, issn = {0921-7126}, doi = {10.3233/AIC-180574}, pages = {113 -- 124}, year = {2019}, abstract = {A significant percentage of urban traffic is caused by the search for parking spots. One possible approach to improve this situation is to guide drivers along routes which are likely to have free parking spots. The task of finding such a route can be modeled as a probabilistic graph problem which is NP-complete. Thus, we propose heuristic approaches for solving this problem and evaluate them experimentally. For this, we use probabilities of finding a parking spot, which are based on publicly available empirical data from TomTom International B.V. Additionally, we propose a heuristic that relies exclusively on conventional road attributes. Our experiments show that this algorithm comes close to the baseline by a factor of 1.3 in our cost measure. Last, we complement our experiments with results from a field study, comparing the success rates of our algorithms against real human drivers.}, language = {en} } @misc{ChakrabortyHammerBugiel2019, author = {Chakraborty, Dhiman and Hammer, Christian and Bugiel, Sven}, title = {Secure Multi-Execution in Android}, series = {Proceedings of the 34th ACM/SIGAPP Symposium on Applied Computing}, journal = {Proceedings of the 34th ACM/SIGAPP Symposium on Applied Computing}, publisher = {Association for Computing Machinery}, address = {New York}, isbn = {978-1-4503-5933-7}, doi = {10.1145/3297280.3297469}, pages = {1934 -- 1943}, year = {2019}, abstract = {Mobile operating systems, such as Google's Android, have become a fixed part of our daily lives and are entrusted with a plethora of private information. Congruously, their data protection mechanisms have been improved steadily over the last decade and, in particular, for Android, the research community has explored various enhancements and extensions to the access control model. However, the vast majority of those solutions has been concerned with controlling the access to data, but equally important is the question of how to control the flow of data once released. Ignoring control over the dissemination of data between applications or between components of the same app, opens the door for attacks, such as permission re-delegation or privacy-violating third-party libraries. Controlling information flows is a long-standing problem, and one of the most recent and practical-oriented approaches to information flow control is secure multi-execution. In this paper, we present Ariel, the design and implementation of an IFC architecture for Android based on the secure multi-execution of apps. Ariel demonstrably extends Android's system with support for executing multiple instances of apps, and it is equipped with a policy lattice derived from the protection levels of Android's permissions as well as an I/O scheduler to achieve control over data flows between application instances. We demonstrate how secure multi-execution with Ariel can help to mitigate two prominent attacks on Android, permission re-delegations and malicious advertisement libraries.}, language = {en} } @misc{WelearegaiSchlueterHammer2019, author = {Welearegai, Gebrehiwet B. and Schlueter, Max and Hammer, Christian}, title = {Static security evaluation of an industrial web application}, series = {Proceedings of the 34th ACM/SIGAPP Symposium on Applied Computing}, journal = {Proceedings of the 34th ACM/SIGAPP Symposium on Applied Computing}, publisher = {Association for Computing Machinery}, address = {New York}, isbn = {978-1-4503-5933-7}, doi = {10.1145/3297280.3297471}, pages = {1952 -- 1961}, year = {2019}, abstract = {JavaScript is the most popular programming language for web applications. Static analysis of JavaScript applications is highly challenging due to its dynamic language constructs and event-driven asynchronous executions, which also give rise to many security-related bugs. Several static analysis tools to detect such bugs exist, however, research has not yet reported much on the precision and scalability trade-off of these analyzers. As a further obstacle, JavaScript programs structured in Node. js modules need to be collected for analysis, but existing bundlers are either specific to their respective analysis tools or not particularly suitable for static analysis.}, language = {en} } @misc{Friedrich2019, author = {Friedrich, Tobias}, title = {From graph theory to network science}, series = {36th International Symposium on Theoretical Aspects of Computer Science (STACS 2019)}, volume = {126}, journal = {36th International Symposium on Theoretical Aspects of Computer Science (STACS 2019)}, publisher = {Schloss Dagstuhl-Leibniz-Zentrum f{\"u}r Informatik}, address = {Dragstuhl}, isbn = {978-3-95977-100-9}, doi = {10.4230/LIPIcs.STACS.2019.5}, pages = {9}, year = {2019}, abstract = {Network science is driven by the question which properties large real-world networks have and how we can exploit them algorithmically. In the past few years, hyperbolic graphs have emerged as a very promising model for scale-free networks. The connection between hyperbolic geometry and complex networks gives insights in both directions: (1) Hyperbolic geometry forms the basis of a natural and explanatory model for real-world networks. Hyperbolic random graphs are obtained by choosing random points in the hyperbolic plane and connecting pairs of points that are geometrically close. The resulting networks share many structural properties for example with online social networks like Facebook or Twitter. They are thus well suited for algorithmic analyses in a more realistic setting. (2) Starting with a real-world network, hyperbolic geometry is well-suited for metric embeddings. The vertices of a network can be mapped to points in this geometry, such that geometric distances are similar to graph distances. Such embeddings have a variety of algorithmic applications ranging from approximations based on efficient geometric algorithms to greedy routing solely using hyperbolic coordinates for navigation decisions.}, language = {en} } @misc{BrandGiese2019, author = {Brand, Thomas and Giese, Holger Burkhard}, title = {Towards Generic Adaptive Monitoring}, series = {2018 IEEE 12th International Conference on Self-Adaptive and Self-Organizing Systems (SASO)}, journal = {2018 IEEE 12th International Conference on Self-Adaptive and Self-Organizing Systems (SASO)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-5172-8}, issn = {1949-3673}, doi = {10.1109/SASO.2018.00027}, pages = {156 -- 161}, year = {2019}, abstract = {Monitoring is a key prerequisite for self-adaptive software and many other forms of operating software. Monitoring relevant lower level phenomena like the occurrences of exceptions and diagnosis data requires to carefully examine which detailed information is really necessary and feasible to monitor. Adaptive monitoring permits observing a greater variety of details with less overhead, if most of the time the MAPE-K loop can operate using only a small subset of all those details. However, engineering such an adaptive monitoring is a major engineering effort on its own that further complicates the development of self-adaptive software. The proposed approach overcomes the outlined problems by providing generic adaptive monitoring via runtime models. It reduces the effort to introduce and apply adaptive monitoring by avoiding additional development effort for controlling the monitoring adaptation. Although the generic approach is independent from the monitoring purpose, it still allows for substantial savings regarding the monitoring resource consumption as demonstrated by an example.}, language = {en} } @misc{PlauthPolze2018, author = {Plauth, Max and Polze, Andreas}, title = {Towards improving data transfer efficiency for accelerators using hardware compression}, series = {Sixth International Symposium on Computing and Networking Workshops (CANDARW)}, journal = {Sixth International Symposium on Computing and Networking Workshops (CANDARW)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-9184-7}, doi = {10.1109/CANDARW.2018.00031}, pages = {125 -- 131}, year = {2018}, abstract = {The overhead of moving data is the major limiting factor in todays hardware, especially in heterogeneous systems where data needs to be transferred frequently between host and accelerator memory. With the increasing availability of hardware-based compression facilities in modern computer architectures, this paper investigates the potential of hardware-accelerated I/O Link Compression as a promising approach to reduce data volumes and transfer time, thus improving the overall efficiency of accelerators in heterogeneous systems. Our considerations are focused on On-the-Fly compression in both Single-Node and Scale-Out deployments. Based on a theoretical analysis, this paper demonstrates the feasibility of hardware-accelerated On-the-Fly I/O Link Compression for many workloads in a Scale-Out scenario, and for some even in a Single-Node scenario. These findings are confirmed in a preliminary evaluation using software-and hardware-based implementations of the 842 compression algorithm.}, language = {en} } @misc{MatthiesTeusnerHesse2018, author = {Matthies, Christoph and Teusner, Ralf and Hesse, G{\"u}nter}, title = {Beyond Surveys}, series = {2018 IEEE Frontiers in Education (FIE) Conference}, journal = {2018 IEEE Frontiers in Education (FIE) Conference}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-1174-6}, issn = {0190-5848}, pages = {9}, year = {2018}, language = {en} } @misc{TeusnerMatthiesStaubitz2018, author = {Teusner, Ralf and Matthies, Christoph and Staubitz, Thomas}, title = {What Stays in Mind?}, series = {IEEE Frontiers in Education Conference (FIE)}, journal = {IEEE Frontiers in Education Conference (FIE)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-1174-6}, issn = {0190-5848}, doi = {10.1109/FIE.2018.8658890}, pages = {9}, year = {2018}, language = {en} } @misc{RepkeKrestelEddingetal.2018, author = {Repke, Tim and Krestel, Ralf and Edding, Jakob and Hartmann, Moritz and Hering, Jonas and Kipping, Dennis and Schmidt, Hendrik and Scordialo, Nico and Zenner, Alexander}, title = {Beacon in the Dark}, series = {Proceedings of the 27th ACM International Conference on Information and Knowledge Management}, journal = {Proceedings of the 27th ACM International Conference on Information and Knowledge Management}, publisher = {Association for Computing Machinery}, address = {New York}, isbn = {978-1-4503-6014-2}, doi = {10.1145/3269206.3269231}, pages = {1871 -- 1874}, year = {2018}, abstract = {The large amount of heterogeneous data in these email corpora renders experts' investigations by hand infeasible. Auditors or journalists, e.g., who are looking for irregular or inappropriate content or suspicious patterns, are in desperate need for computer-aided exploration tools to support their investigations. We present our Beacon system for the exploration of such corpora at different levels of detail. A distributed processing pipeline combines text mining methods and social network analysis to augment the already semi-structured nature of emails. The user interface ties into the resulting cleaned and enriched dataset. For the interface design we identify three objectives expert users have: gain an initial overview of the data to identify leads to investigate, understand the context of the information at hand, and have meaningful filters to iteratively focus onto a subset of emails. To this end we make use of interactive visualisations based on rearranged and aggregated extracted information to reveal salient patterns.}, language = {en} } @misc{LosterNaumannEhmuelleretal.2018, author = {Loster, Michael and Naumann, Felix and Ehmueller, Jan and Feldmann, Benjamin}, title = {CurEx}, series = {Proceedings of the 27th ACM International Conference on Information and Knowledge Management}, journal = {Proceedings of the 27th ACM International Conference on Information and Knowledge Management}, publisher = {Association for Computing Machinery}, address = {New York}, isbn = {978-1-4503-6014-2}, doi = {10.1145/3269206.3269229}, pages = {1883 -- 1886}, year = {2018}, abstract = {The integration of diverse structured and unstructured information sources into a unified, domain-specific knowledge base is an important task in many areas. A well-maintained knowledge base enables data analysis in complex scenarios, such as risk analysis in the financial sector or investigating large data leaks, such as the Paradise or Panama papers. Both the creation of such knowledge bases, as well as their continuous maintenance and curation involves many complex tasks and considerable manual effort. With CurEx, we present a modular system that allows structured and unstructured data sources to be integrated into a domain-specific knowledge base. In particular, we (i) enable the incremental improvement of each individual integration component; (ii) enable the selective generation of multiple knowledge graphs from the information contained in the knowledge base; and (iii) provide two distinct user interfaces tailored to the needs of data engineers and end-users respectively. The former has curation capabilities and controls the integration process, whereas the latter focuses on the exploration of the generated knowledge graph.}, language = {en} } @misc{Matthies2018, author = {Matthies, Christoph}, title = {Scrum2kanban}, series = {Proceedings of the 2nd International Workshop on Software Engineering Education for Millennials}, journal = {Proceedings of the 2nd International Workshop on Software Engineering Education for Millennials}, publisher = {IEEE}, address = {New York}, isbn = {978-1-45035-750-0}, doi = {10.1145/3194779.3194784}, pages = {48 -- 55}, year = {2018}, abstract = {Using university capstone courses to teach agile software development methodologies has become commonplace, as agile methods have gained support in professional software development. This usually means students are introduced to and work with the currently most popular agile methodology: Scrum. However, as the agile methods employed in the industry change and are adapted to different contexts, university courses must follow suit. A prime example of this is the Kanban method, which has recently gathered attention in the industry. In this paper, we describe a capstone course design, which adds the hands-on learning of the lean principles advocated by Kanban into a capstone project run with Scrum. This both ensures that students are aware of recent process frameworks and ideas as well as gain a more thorough overview of how agile methods can be employed in practice. We describe the details of the course and analyze the participating students' perceptions as well as our observations. We analyze the development artifacts, created by students during the course in respect to the two different development methodologies. We further present a summary of the lessons learned as well as recommendations for future similar courses. The survey conducted at the end of the course revealed an overwhelmingly positive attitude of students towards the integration of Kanban into the course.}, language = {en} } @misc{ArandaSchoelzelMendezetal.2018, author = {Aranda, Juan and Sch{\"o}lzel, Mario and Mendez, Diego and Carrillo, Henry}, title = {An energy consumption model for multiModal wireless sensor networks based on wake-up radio receivers}, series = {2018 IEEE Colombian Conference on Communications and Computing (COLCOM)}, journal = {2018 IEEE Colombian Conference on Communications and Computing (COLCOM)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-6820-7}, doi = {10.1109/ColComCon.2018.8466728}, pages = {6}, year = {2018}, abstract = {Energy consumption is a major concern in Wireless Sensor Networks. A significant waste of energy occurs due to the idle listening and overhearing problems, which are typically avoided by turning off the radio, while no transmission is ongoing. The classical approach for allowing the reception of messages in such situations is to use a low-duty-cycle protocol, and to turn on the radio periodically, which reduces the idle listening problem, but requires timers and usually unnecessary wakeups. A better solution is to turn on the radio only on demand by using a Wake-up Radio Receiver (WuRx). In this paper, an energy model is presented to estimate the energy saving in various multi-hop network topologies under several use cases, when a WuRx is used instead of a classical low-duty-cycling protocol. The presented model also allows for estimating the benefit of various WuRx properties like using addressing or not.}, language = {en} } @article{MarufuKayemWolthusen2018, author = {Marufu, Anesu M. C. and Kayem, Anne Voluntas dei Massah and Wolthusen, Stephen D.}, title = {The design and classification of cheating attacks on power marketing schemes in resource constrained smart micro-grids}, series = {Smart Micro-Grid Systems Security and Privacy}, volume = {71}, journal = {Smart Micro-Grid Systems Security and Privacy}, publisher = {Springer}, address = {Dordrecht}, isbn = {978-3-319-91427-5}, doi = {10.1007/978-3-319-91427-5_6}, pages = {103 -- 144}, year = {2018}, abstract = {In this chapter, we provide a framework to specify how cheating attacks can be conducted successfully on power marketing schemes in resource constrained smart micro-grids. This is an important problem because such cheating attacks can destabilise and in the worst case result in a breakdown of the micro-grid. We consider three aspects, in relation to modelling cheating attacks on power auctioning schemes. First, we aim to specify exactly how in spite of the resource constrained character of the micro-grid, cheating can be conducted successfully. Second, we consider how mitigations can be modelled to prevent cheating, and third, we discuss methods of maintaining grid stability and reliability even in the presence of cheating attacks. We use an Automated-Cheating-Attack (ACA) conception to build a taxonomy of cheating attacks based on the idea of adversarial acquisition of surplus energy. Adversarial acquisitions of surplus energy allow malicious users to pay less for access to more power than the quota allowed for the price paid. The impact on honest users, is the lack of an adequate supply of energy to meet power demand requests. We conclude with a discussion of the performance overhead of provoking, detecting, and mitigating such attacks efficiently.}, language = {en} } @misc{HaarmannBatoulisNikajetal.2018, author = {Haarmann, Stephan and Batoulis, Kimon and Nikaj, Adriatik and Weske, Mathias}, title = {DMN Decision Execution on the Ethereum Blockchain}, series = {Advanced Information Systems Engineering, CAISE 2018}, volume = {10816}, journal = {Advanced Information Systems Engineering, CAISE 2018}, publisher = {Springer}, address = {Cham}, isbn = {978-3-319-91563-0}, issn = {0302-9743}, doi = {10.1007/978-3-319-91563-0_20}, pages = {327 -- 341}, year = {2018}, abstract = {Recently blockchain technology has been introduced to execute interacting business processes in a secure and transparent way. While the foundations for process enactment on blockchain have been researched, the execution of decisions on blockchain has not been addressed yet. In this paper we argue that decisions are an essential aspect of interacting business processes, and, therefore, also need to be executed on blockchain. The immutable representation of decision logic can be used by the interacting processes, so that decision taking will be more secure, more transparent, and better auditable. The approach is based on a mapping of the DMN language S-FEEL to Solidity code to be run on the Ethereum blockchain. The work is evaluated by a proof-of-concept prototype and an empirical cost evaluation.}, language = {en} } @misc{GrossTiwariHammer2018, author = {Gross, Sascha and Tiwari, Abhishek and Hammer, Christian}, title = {PlAnalyzer}, series = {Computer Security(ESORICS 2018), PT II}, volume = {11099}, journal = {Computer Security(ESORICS 2018), PT II}, publisher = {Springer}, address = {Cham}, isbn = {978-3-319-98989-1}, issn = {0302-9743}, doi = {10.1007/978-3-319-98989-1_3}, pages = {41 -- 59}, year = {2018}, abstract = {In this work we propose PIAnalyzer, a novel approach to analyze PendingIntent related vulnerabilities. We empirically evaluate PIAnalyzer on a set of 1000 randomly selected applications from the Google Play Store and find 1358 insecure usages of Pendinglntents, including 70 severe vulnerabilities. We manually inspected ten reported vulnerabilities out of which nine correctly reported vulnerabilities, indicating a high precision. The evaluation shows that PIAnalyzer is efficient with an average execution time of 13 seconds per application.}, language = {en} } @misc{GalkeGerstenkornScherp2018, author = {Galke, Lukas and Gerstenkorn, Gunnar and Scherp, Ansgar}, title = {A case atudy of closed-domain response suggestion with limited training data}, series = {Database and Expert Systems Applications : DEXA 2018 Iinternational workshops}, volume = {903}, journal = {Database and Expert Systems Applications : DEXA 2018 Iinternational workshops}, publisher = {Springer}, address = {Berlin}, isbn = {978-3-319-99133-7}, issn = {1865-0929}, doi = {10.1007/978-3-319-99133-7_18}, pages = {218 -- 229}, year = {2018}, abstract = {We analyze the problem of response suggestion in a closed domain along a real-world scenario of a digital library. We present a text-processing pipeline to generate question-answer pairs from chat transcripts. On this limited amount of training data, we compare retrieval-based, conditioned-generation, and dedicated representation learning approaches for response suggestion. Our results show that retrieval-based methods that strive to find similar, known contexts are preferable over parametric approaches from the conditioned-generation family, when the training data is limited. We, however, identify a specific representation learning approach that is competitive to the retrieval-based approaches despite the training data limitation.}, language = {en} } @article{YousfiBatoulisWeske2019, author = {Yousfi, Alaaeddine and Batoulis, Kimon and Weske, Mathias}, title = {Achieving Business Process Improvement via Ubiquitous Decision-Aware Business Processes}, series = {ACM Transactions on Internet Technology}, volume = {19}, journal = {ACM Transactions on Internet Technology}, number = {1}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {1533-5399}, doi = {10.1145/3298986}, pages = {19}, year = {2019}, abstract = {Business process improvement is an endless challenge for many organizations. As long as there is a process, it must he improved. Nowadays, improvement initiatives are driven by professionals. This is no longer practical because people cannot perceive the enormous data of current business environments. Here, we introduce ubiquitous decision-aware business processes. They pervade the physical space, analyze the ever-changing environments, and make decisions accordingly. We explain how they can be built and used for improvement. Our approach can be a valuable improvement option to alleviate the workload of participants by helping focus on the crucial rather than the menial tasks.}, language = {en} } @article{SchneiderMaximovaSakizloglouetal.2021, author = {Schneider, Sven and Maximova, Maria and Sakizloglou, Lucas and Giese, Holger}, title = {Formal testing of timed graph transformation systems using metric temporal graph logic}, series = {International journal on software tools for technology transfer}, volume = {23}, journal = {International journal on software tools for technology transfer}, number = {3}, publisher = {Springer}, address = {Heidelberg}, issn = {1433-2779}, doi = {10.1007/s10009-020-00585-w}, pages = {411 -- 488}, year = {2021}, abstract = {Embedded real-time systems generate state sequences where time elapses between state changes. Ensuring that such systems adhere to a provided specification of admissible or desired behavior is essential. Formal model-based testing is often a suitable cost-effective approach. We introduce an extended version of the formalism of symbolic graphs, which encompasses types as well as attributes, for representing states of dynamic systems. Relying on this extension of symbolic graphs, we present a novel formalism of timed graph transformation systems (TGTSs) that supports the model-based development of dynamic real-time systems at an abstract level where possible state changes and delays are specified by graph transformation rules. We then introduce an extended form of the metric temporal graph logic (MTGL) with increased expressiveness to improve the applicability of MTGL for the specification of timed graph sequences generated by a TGTS. Based on the metric temporal operators of MTGL and its built-in graph binding mechanics, we express properties on the structure and attributes of graphs as well as on the occurrence of graphs over time that are related by their inner structure. We provide formal support for checking whether a single generated timed graph sequence adheres to a provided MTGL specification. Relying on this logical foundation, we develop a testing framework for TGTSs that are specified using MTGL. Lastly, we apply this testing framework to a running example by using our prototypical implementation in the tool AutoGraph.}, language = {en} } @misc{BrinkmannHeine2019, author = {Brinkmann, Maik and Heine, Moreen}, title = {Can Blockchain Leverage for New Public Governance?}, series = {Proceedings of the 12th International Conference on Theory and Practice of Electronic Governance}, journal = {Proceedings of the 12th International Conference on Theory and Practice of Electronic Governance}, publisher = {Association for Computing Machinery}, address = {New York}, isbn = {978-1-4503-6644-1}, doi = {10.1145/3326365.3326409}, pages = {338 -- 341}, year = {2019}, abstract = {New Public Governance (NPG) as a paradigm for collaborative forms of public service delivery and Blockchain governance are trending topics for researchers and practitioners alike. Thus far, each topic has, on the whole, been discussed separately. This paper presents the preliminary results of ongoing research which aims to shed light on the more concrete benefits of Blockchain for the purpose of NPG. For the first time, a conceptual analysis is conducted on process level to spot benefits and limitations of Blockchain-based governance. Per process element, Blockchain key characteristics are mapped to functional aspects of NPG from a governance perspective. The preliminary results show that Blockchain offers valuable support for governments seeking methods to effectively coordinate co-producing networks. However, the extent of benefits of Blockchain varies across the process elements. It becomes evident that there is a need for off-chain processes. It is, therefore, argued in favour of intensifying research on off-chain governance processes to better understand the implications for and influences on on-chain governance.}, language = {en} } @article{LadleifWeske2021, author = {Ladleif, Jan and Weske, Mathias}, title = {Which event happened first?}, series = {Frontiers in blockchain}, volume = {4}, journal = {Frontiers in blockchain}, publisher = {Frontiers in Blockchain}, address = {Lausanne, Schweiz}, issn = {2624-7852}, doi = {10.3389/fbloc.2021.758169}, pages = {1 -- 16}, year = {2021}, abstract = {First come, first served: Critical choices between alternative actions are often made based on events external to an organization, and reacting promptly to their occurrence can be a major advantage over the competition. In Business Process Management (BPM), such deferred choices can be expressed in process models, and they are an important aspect of process engines. Blockchain-based process execution approaches are no exception to this, but are severely limited by the inherent properties of the platform: The isolated environment prevents direct access to external entities and data, and the non-continual runtime based entirely on atomic transactions impedes the monitoring and detection of events. In this paper we provide an in-depth examination of the semantics of deferred choice, and transfer them to environments such as the blockchain. We introduce and compare several oracle architectures able to satisfy certain requirements, and show that they can be implemented using state-of-the-art blockchain technology.}, language = {en} } @phdthesis{Batoulis2019, author = {Batoulis, Kimon}, title = {Sound integration of process and decision models}, doi = {10.25932/publishup-43738}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-437386}, school = {Universit{\"a}t Potsdam}, pages = {xviii, 155}, year = {2019}, abstract = {Business process management is an established technique for business organizations to manage and support their processes. Those processes are typically represented by graphical models designed with modeling languages, such as the Business Process Model and Notation (BPMN). Since process models do not only serve the purpose of documentation but are also a basis for implementation and automation of the processes, they have to satisfy certain correctness requirements. In this regard, the notion of soundness of workflow nets was developed, that can be applied to BPMN process models in order to verify their correctness. Because the original soundness criteria are very restrictive regarding the behavior of the model, different variants of the soundness notion have been developed for situations in which certain violations are not even harmful. All of those notions do only consider the control-flow structure of a process model, however. This poses a problem, taking into account the fact that with the recent release and the ongoing development of the Decision Model and Notation (DMN) standard, an increasing number of process models are complemented by respective decision models. DMN is a dedicated modeling language for decision logic and separates the concerns of process and decision logic into two different models, process and decision models respectively. Hence, this thesis is concerned with the development of decisionaware soundness notions, i.e., notions of soundness that build upon the original soundness ideas for process models, but additionally take into account complementary decision models. Similar to the various notions of workflow net soundness, this thesis investigates different notions of decision soundness that can be applied depending on the desired degree of restrictiveness. Since decision tables are a standardized means of DMN to represent decision logic, this thesis also puts special focus on decision tables, discussing how they can be translated into an unambiguous format and how their possible output values can be efficiently determined. Moreover, a prototypical implementation is described that supports checking a basic version of decision soundness. The decision soundness notions were also empirically evaluated on models from participants of an online course on process and decision modeling as well as from a process management project of a large insurance company. The evaluation demonstrates that violations of decision soundness indeed occur and can be detected with our approach.}, language = {en} } @article{RischKrestel2020, author = {Risch, Julian and Krestel, Ralf}, title = {Toxic comment detection in online discussions}, series = {Deep learning-based approaches for sentiment analysis}, journal = {Deep learning-based approaches for sentiment analysis}, editor = {Agarwal, Basant and Nayak, Richi and Mittal, Namita and Patnaik, Srikanta}, publisher = {Springer}, address = {Singapore}, isbn = {978-981-15-1216-2}, issn = {2524-7565}, doi = {10.1007/978-981-15-1216-2_4}, pages = {85 -- 109}, year = {2020}, abstract = {Comment sections of online news platforms are an essential space to express opinions and discuss political topics. In contrast to other online posts, news discussions are related to particular news articles, comments refer to each other, and individual conversations emerge. However, the misuse by spammers, haters, and trolls makes costly content moderation necessary. Sentiment analysis can not only support moderation but also help to understand the dynamics of online discussions. A subtask of content moderation is the identification of toxic comments. To this end, we describe the concept of toxicity and characterize its subclasses. Further, we present various deep learning approaches, including datasets and architectures, tailored to sentiment analysis in online discussions. One way to make these approaches more comprehensible and trustworthy is fine-grained instead of binary comment classification. On the downside, more classes require more training data. Therefore, we propose to augment training data by using transfer learning. We discuss real-world applications, such as semi-automated comment moderation and troll detection. Finally, we outline future challenges and current limitations in light of most recent research publications.}, language = {en} } @article{DombrowskiErmakovaFabian2019, author = {Dombrowski, Sebastian and Ermakova, Tatiana and Fabian, Benjamin}, title = {Graph-based analysis of cloud connectivity at the internet protocol level}, series = {International Journal of Communication Networks and Distributed Systems (IJCNDS)}, volume = {23}, journal = {International Journal of Communication Networks and Distributed Systems (IJCNDS)}, number = {1}, publisher = {Inderscience Enterprises Ltd}, address = {Geneva}, issn = {1754-3916}, doi = {10.1504/IJCNDS.2019.100644}, pages = {117 -- 142}, year = {2019}, abstract = {Internet connectivity of cloud services is of exceptional importance for both their providers and consumers. This article demonstrates the outlines of a method for measuring cloud-service connectivity at the internet protocol level from a client's perspective. For this, we actively collect connectivity data via traceroute measurements from PlanetLab to several major cloud services. Furthermore, we construct graph models from the collected data, and analyse the connectivity of the services based on important graph-based measures. Then, random and targeted node removal attacks are simulated, and the corresponding vulnerability of cloud services is evaluated. Our results indicate that cloud service hosts are, on average, much better connected than average hosts. However, when interconnecting nodes are removed in a targeted manner, cloud connectivity is dramatically reduced.}, language = {en} } @article{JiangNaumann2020, author = {Jiang, Lan and Naumann, Felix}, title = {Holistic primary key and foreign key detection}, series = {Journal of intelligent information systems : JIIS}, volume = {54}, journal = {Journal of intelligent information systems : JIIS}, number = {3}, publisher = {Springer}, address = {Dordrecht}, issn = {0925-9902}, doi = {10.1007/s10844-019-00562-z}, pages = {439 -- 461}, year = {2020}, abstract = {Primary keys (PKs) and foreign keys (FKs) are important elements of relational schemata in various applications, such as query optimization and data integration. However, in many cases, these constraints are unknown or not documented. Detecting them manually is time-consuming and even infeasible in large-scale datasets. We study the problem of discovering primary keys and foreign keys automatically and propose an algorithm to detect both, namely Holistic Primary Key and Foreign Key Detection (HoPF). PKs and FKs are subsets of the sets of unique column combinations (UCCs) and inclusion dependencies (INDs), respectively, for which efficient discovery algorithms are known. Using score functions, our approach is able to effectively extract the true PKs and FKs from the vast sets of valid UCCs and INDs. Several pruning rules are employed to speed up the procedure. We evaluate precision and recall on three benchmarks and two real-world datasets. The results show that our method is able to retrieve on average 88\% of all primary keys, and 91\% of all foreign keys. We compare the performance of HoPF with two baseline approaches that both assume the existence of primary keys.}, language = {en} } @phdthesis{Koumarelas2020, author = {Koumarelas, Ioannis}, title = {Data preparation and domain-agnostic duplicate detection}, doi = {10.25932/publishup-48913}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-489131}, school = {Universit{\"a}t Potsdam}, pages = {x, 97}, year = {2020}, abstract = {Successfully completing any data science project demands careful consideration across its whole process. Although the focus is often put on later phases of the process, in practice, experts spend more time in earlier phases, preparing data, to make them consistent with the systems' requirements or to improve their models' accuracies. Duplicate detection is typically applied during the data cleaning phase, which is dedicated to removing data inconsistencies and improving the overall quality and usability of data. While data cleaning involves a plethora of approaches to perform specific operations, such as schema alignment and data normalization, the task of detecting and removing duplicate records is particularly challenging. Duplicates arise when multiple records representing the same entities exist in a database. Due to numerous reasons, spanning from simple typographical errors to different schemas and formats of integrated databases. Keeping a database free of duplicates is crucial for most use-cases, as their existence causes false negatives and false positives when matching queries against it. These two data quality issues have negative implications for tasks, such as hotel booking, where users may erroneously select a wrong hotel, or parcel delivery, where a parcel can get delivered to the wrong address. Identifying the variety of possible data issues to eliminate duplicates demands sophisticated approaches. While research in duplicate detection is well-established and covers different aspects of both efficiency and effectiveness, our work in this thesis focuses on the latter. We propose novel approaches to improve data quality before duplicate detection takes place and apply the latter in datasets even when prior labeling is not available. Our experiments show that improving data quality upfront can increase duplicate classification results by up to 19\%. To this end, we propose two novel pipelines that select and apply generic as well as address-specific data preparation steps with the purpose of maximizing the success of duplicate detection. Generic data preparation, such as the removal of special characters, can be applied to any relation with alphanumeric attributes. When applied, data preparation steps are selected only for attributes where there are positive effects on pair similarities, which indirectly affect classification, or on classification directly. Our work on addresses is twofold; first, we consider more domain-specific approaches to improve the quality of values, and, second, we experiment with known and modified versions of similarity measures to select the most appropriate per address attribute, e.g., city or country. To facilitate duplicate detection in applications where gold standard annotations are not available and obtaining them is not possible or too expensive, we propose MDedup. MDedup is a novel, rule-based, and fully automatic duplicate detection approach that is based on matching dependencies. These dependencies can be used to detect duplicates and can be discovered using state-of-the-art algorithms efficiently and without any prior labeling. MDedup uses two pipelines to first train on datasets with known labels, learning to identify useful matching dependencies, and then be applied on unseen datasets, regardless of any existing gold standard. Finally, our work is accompanied by open source code to enable repeatability of our research results and application of our approaches to other datasets.}, language = {en} } @phdthesis{Lazaridou2021, author = {Lazaridou, Konstantina}, title = {Revealing hidden patterns in political news and social media with machine learning}, doi = {10.25932/publishup-50273}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-502734}, school = {Universit{\"a}t Potsdam}, pages = {xv, 140}, year = {2021}, abstract = {As part of our everyday life we consume breaking news and interpret it based on our own viewpoints and beliefs. We have easy access to online social networking platforms and news media websites, where we inform ourselves about current affairs and often post about our own views, such as in news comments or social media posts. The media ecosystem enables opinions and facts to travel from news sources to news readers, from news article commenters to other readers, from social network users to their followers, etc. The views of the world many of us have depend on the information we receive via online news and social media. Hence, it is essential to maintain accurate, reliable and objective online content to ensure democracy and verity on the Web. To this end, we contribute to a trustworthy media ecosystem by analyzing news and social media in the context of politics to ensure that media serves the public interest. In this thesis, we use text mining, natural language processing and machine learning techniques to reveal underlying patterns in political news articles and political discourse in social networks. Mainstream news sources typically cover a great amount of the same news stories every day, but they often place them in a different context or report them from different perspectives. In this thesis, we are interested in how distinct and predictable newspaper journalists are, in the way they report the news, as a means to understand and identify their different political beliefs. To this end, we propose two models that classify text from news articles to their respective original news source, i.e., reported speech and also news comments. Our goal is to capture systematic quoting and commenting patterns by journalists and news commenters respectively, which can lead us to the newspaper where the quotes and comments are originally published. Predicting news sources can help us understand the potential subjective nature behind news storytelling and the magnitude of this phenomenon. Revealing this hidden knowledge can restore our trust in media by advancing transparency and diversity in the news. Media bias can be expressed in various subtle ways in the text and it is often challenging to identify these bias manifestations correctly, even for humans. However, media experts, e.g., journalists, are a powerful resource that can help us overcome the vague definition of political media bias and they can also assist automatic learners to find the hidden bias in the text. Due to the enormous technological advances in artificial intelligence, we hypothesize that identifying political bias in the news could be achieved through the combination of sophisticated deep learning modelsxi and domain expertise. Therefore, our second contribution is a high-quality and reliable news dataset annotated by journalists for political bias and a state-of-the-art solution for this task based on curriculum learning. Our aim is to discover whether domain expertise is necessary for this task and to provide an automatic solution for this traditionally manually-solved problem. User generated content is fundamentally different from news articles, e.g., messages are shorter, they are often personal and opinionated, they refer to specific topics and persons, etc. Regarding political and socio-economic news, individuals in online communities make use of social networks to keep their peers up-to-date and to share their own views on ongoing affairs. We believe that social media is also an as powerful instrument for information flow as the news sources are, and we use its unique characteristic of rapid news coverage for two applications. We analyze Twitter messages and debate transcripts during live political presidential debates to automatically predict the topics that Twitter users discuss. Our goal is to discover the favoured topics in online communities on the dates of political events as a way to understand the political subjects of public interest. With the up-to-dateness of microblogs, an additional opportunity emerges, namely to use social media posts and leverage the real-time verity about discussed individuals to find their locations. That is, given a person of interest that is mentioned in online discussions, we use the wisdom of the crowd to automatically track her physical locations over time. We evaluate our approach in the context of politics, i.e., we predict the locations of US politicians as a proof of concept for important use cases, such as to track people that are national risks, e.g., warlords and wanted criminals.}, language = {en} } @phdthesis{Pape2021, author = {Pape, Tobias}, title = {Efficient compound values in virtual machines}, doi = {10.25932/publishup-49913}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-499134}, school = {Universit{\"a}t Potsdam}, pages = {xxix, 242}, year = {2021}, abstract = {Compound values are not universally supported in virtual machine (VM)-based programming systems and languages. However, providing data structures with value characteristics can be beneficial. On one hand, programming systems and languages can adequately represent physical quantities with compound values and avoid inconsistencies, for example, in representation of large numbers. On the other hand, just-in-time (JIT) compilers, which are often found in VMs, can rely on the fact that compound values are immutable, which is an important property in optimizing programs. Considering this, compound values have an optimization potential that can be put to use by implementing them in VMs in a way that is efficient in memory usage and execution time. Yet, optimized compound values in VMs face certain challenges: to maintain consistency, it should not be observable by the program whether compound values are represented in an optimized way by a VM; an optimization should take into account, that the usage of compound values can exhibit certain patterns at run-time; and that necessary value-incompatible properties due to implementation restrictions should be reduced. We propose a technique to detect and compress common patterns of compound value usage at run-time to improve memory usage and execution speed. Our approach identifies patterns of frequent compound value references and introduces abbreviated forms for them. Thus, it is possible to store multiple inter-referenced compound values in an inlined memory representation, reducing the overhead of metadata and object references. We extend our approach by a notion of limited mutability, using cells that act as barriers for our approach and provide a location for shared, mutable access with the possibility of type specialization. We devise an extension to our approach that allows us to express automatic unboxing of boxed primitive data types in terms of our initial technique. We show that our approach is versatile enough to express another optimization technique that relies on values, such as Booleans, that are unique throughout a programming system. Furthermore, we demonstrate how to re-use learned usage patterns and optimizations across program runs, thus reducing the performance impact of pattern recognition. We show in a best-case prototype that the implementation of our approach is feasible and can also be applied to general purpose programming systems, namely implementations of the Racket language and Squeak/Smalltalk. In several micro-benchmarks, we found that our approach can effectively reduce memory consumption and improve execution speed.}, language = {en} } @phdthesis{Loster2021, author = {Loster, Michael}, title = {Knowledge base construction with machine learning methods}, doi = {10.25932/publishup-50145}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-501459}, school = {Universit{\"a}t Potsdam}, pages = {ii, 130}, year = {2021}, abstract = {Modern knowledge bases contain and organize knowledge from many different topic areas. Apart from specific entity information, they also store information about their relationships amongst each other. Combining this information results in a knowledge graph that can be particularly helpful in cases where relationships are of central importance. Among other applications, modern risk assessment in the financial sector can benefit from the inherent network structure of such knowledge graphs by assessing the consequences and risks of certain events, such as corporate insolvencies or fraudulent behavior, based on the underlying network structure. As public knowledge bases often do not contain the necessary information for the analysis of such scenarios, the need arises to create and maintain dedicated domain-specific knowledge bases. This thesis investigates the process of creating domain-specific knowledge bases from structured and unstructured data sources. In particular, it addresses the topics of named entity recognition (NER), duplicate detection, and knowledge validation, which represent essential steps in the construction of knowledge bases. As such, we present a novel method for duplicate detection based on a Siamese neural network that is able to learn a dataset-specific similarity measure which is used to identify duplicates. Using the specialized network architecture, we design and implement a knowledge transfer between two deduplication networks, which leads to significant performance improvements and a reduction of required training data. Furthermore, we propose a named entity recognition approach that is able to identify company names by integrating external knowledge in the form of dictionaries into the training process of a conditional random field classifier. In this context, we study the effects of different dictionaries on the performance of the NER classifier. We show that both the inclusion of domain knowledge as well as the generation and use of alias names results in significant performance improvements. For the validation of knowledge represented in a knowledge base, we introduce Colt, a framework for knowledge validation based on the interactive quality assessment of logical rules. In its most expressive implementation, we combine Gaussian processes with neural networks to create Colt-GP, an interactive algorithm for learning rule models. Unlike other approaches, Colt-GP uses knowledge graph embeddings and user feedback to cope with data quality issues of knowledge bases. The learned rule model can be used to conditionally apply a rule and assess its quality. Finally, we present CurEx, a prototypical system for building domain-specific knowledge bases from structured and unstructured data sources. Its modular design is based on scalable technologies, which, in addition to processing large datasets, ensures that the modules can be easily exchanged or extended. CurEx offers multiple user interfaces, each tailored to the individual needs of a specific user group and is fully compatible with the Colt framework, which can be used as part of the system. We conduct a wide range of experiments with different datasets to determine the strengths and weaknesses of the proposed methods. To ensure the validity of our results, we compare the proposed methods with competing approaches.}, language = {en} } @misc{GieseHenklerHirsch2017, author = {Giese, Holger and Henkler, Stefan and Hirsch, Martin}, title = {A multi-paradigm approach supporting the modular execution of reconfigurable hybrid systems}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-402896}, pages = {34}, year = {2017}, abstract = {Advanced mechatronic systems have to integrate existing technologies from mechanical, electrical and software engineering. They must be able to adapt their structure and behavior at runtime by reconfiguration to react flexibly to changes in the environment. Therefore, a tight integration of structural and behavioral models of the different domains is required. This integration results in complex reconfigurable hybrid systems, the execution logic of which cannot be addressed directly with existing standard modeling, simulation, and code-generation techniques. We present in this paper how our component-based approach for reconfigurable mechatronic systems, M ECHATRONIC UML, efficiently handles the complex interplay of discrete behavior and continuous behavior in a modular manner. In addition, its extension to even more flexible reconfiguration cases is presented.}, language = {en} } @phdthesis{Wolf2021, author = {Wolf, Johannes}, title = {Analysis and visualization of transport infrastructure based on large-scale geospatial mobile mapping data}, doi = {10.25932/publishup-53612}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-536129}, school = {Universit{\"a}t Potsdam}, pages = {vi, 121}, year = {2021}, abstract = {3D point clouds are a universal and discrete digital representation of three-dimensional objects and environments. For geospatial applications, 3D point clouds have become a fundamental type of raw data acquired and generated using various methods and techniques. In particular, 3D point clouds serve as raw data for creating digital twins of the built environment. This thesis concentrates on the research and development of concepts, methods, and techniques for preprocessing, semantically enriching, analyzing, and visualizing 3D point clouds for applications around transport infrastructure. It introduces a collection of preprocessing techniques that aim to harmonize raw 3D point cloud data, such as point density reduction and scan profile detection. Metrics such as, e.g., local density, verticality, and planarity are calculated for later use. One of the key contributions tackles the problem of analyzing and deriving semantic information in 3D point clouds. Three different approaches are investigated: a geometric analysis, a machine learning approach operating on synthetically generated 2D images, and a machine learning approach operating on 3D point clouds without intermediate representation. In the first application case, 2D image classification is applied and evaluated for mobile mapping data focusing on road networks to derive road marking vector data. The second application case investigates how 3D point clouds can be merged with ground-penetrating radar data for a combined visualization and to automatically identify atypical areas in the data. For example, the approach detects pavement regions with developing potholes. The third application case explores the combination of a 3D environment based on 3D point clouds with panoramic imagery to improve visual representation and the detection of 3D objects such as traffic signs. The presented methods were implemented and tested based on software frameworks for 3D point clouds and 3D visualization. In particular, modules for metric computation, classification procedures, and visualization techniques were integrated into a modular pipeline-based C++ research framework for geospatial data processing, extended by Python machine learning scripts. All visualization and analysis techniques scale to large real-world datasets such as road networks of entire cities or railroad networks. The thesis shows that some use cases allow taking advantage of established image vision methods to analyze images rendered from mobile mapping data efficiently. The two presented semantic classification methods working directly on 3D point clouds are use case independent and show similar overall accuracy when compared to each other. While the geometry-based method requires less computation time, the machine learning-based method supports arbitrary semantic classes but requires training the network with ground truth data. Both methods can be used in combination to gradually build this ground truth with manual corrections via a respective annotation tool. This thesis contributes results for IT system engineering of applications, systems, and services that require spatial digital twins of transport infrastructure such as road networks and railroad networks based on 3D point clouds as raw data. It demonstrates the feasibility of fully automated data flows that map captured 3D point clouds to semantically classified models. This provides a key component for seamlessly integrated spatial digital twins in IT solutions that require up-to-date, object-based, and semantically enriched information about the built environment.}, language = {en} } @phdthesis{Repke2022, author = {Repke, Tim}, title = {Machine-learning-assisted corpus exploration and visualisation}, doi = {10.25932/publishup-56263}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-562636}, school = {Universit{\"a}t Potsdam}, pages = {xii, 131}, year = {2022}, abstract = {Text collections, such as corpora of books, research articles, news, or business documents are an important resource for knowledge discovery. Exploring large document collections by hand is a cumbersome but necessary task to gain new insights and find relevant information. Our digitised society allows us to utilise algorithms to support the information seeking process, for example with the help of retrieval or recommender systems. However, these systems only provide selective views of the data and require some prior knowledge to issue meaningful queries and asses a system's response. The advancements of machine learning allow us to reduce this gap and better assist the information seeking process. For example, instead of sighting countless business documents by hand, journalists and investigator scan employ natural language processing techniques, such as named entity recognition. Al-though this greatly improves the capabilities of a data exploration platform, the wealth of information is still overwhelming. An overview of the entirety of a dataset in the form of a two-dimensional map-like visualisation may help to circumvent this issue. Such overviews enable novel interaction paradigms for users, which are similar to the exploration of digital geographical maps. In particular, they can provide valuable context by indicating how apiece of information fits into the bigger picture.This thesis proposes algorithms that appropriately pre-process heterogeneous documents and compute the layout for datasets of all kinds. Traditionally, given high-dimensional semantic representations of the data, so-called dimensionality reduction algorithms are usedto compute a layout of the data on a two-dimensional canvas. In this thesis, we focus on text corpora and go beyond only projecting the inherent semantic structure itself. Therefore,we propose three dimensionality reduction approaches that incorporate additional information into the layout process: (1) a multi-objective dimensionality reduction algorithm to jointly visualise semantic information with inherent network information derived from the underlying data; (2) a comparison of initialisation strategies for different dimensionality reduction algorithms to generate a series of layouts for corpora that grow and evolve overtime; (3) and an algorithm that updates existing layouts by incorporating user feedback provided by pointwise drag-and-drop edits. This thesis also contains system prototypes to demonstrate the proposed technologies, including pre-processing and layout of the data and presentation in interactive user interfaces.}, language = {en} } @phdthesis{Katzmann2023, author = {Katzmann, Maximilian}, title = {About the analysis of algorithms on networks with underlying hyperbolic geometry}, doi = {10.25932/publishup-58296}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-582965}, school = {Universit{\"a}t Potsdam}, pages = {xi, 191}, year = {2023}, abstract = {Many complex systems that we encounter in the world can be formalized using networks. Consequently, they have been in the focus of computer science for decades, where algorithms are developed to understand and utilize these systems. Surprisingly, our theoretical understanding of these algorithms and their behavior in practice often diverge significantly. In fact, they tend to perform much better on real-world networks than one would expect when considering the theoretical worst-case bounds. One way of capturing this discrepancy is the average-case analysis, where the idea is to acknowledge the differences between practical and worst-case instances by focusing on networks whose properties match those of real graphs. Recent observations indicate that good representations of real-world networks are obtained by assuming that a network has an underlying hyperbolic geometry. In this thesis, we demonstrate that the connection between networks and hyperbolic space can be utilized as a powerful tool for average-case analysis. To this end, we first introduce strongly hyperbolic unit disk graphs and identify the famous hyperbolic random graph model as a special case of them. We then consider four problems where recent empirical results highlight a gap between theory and practice and use hyperbolic graph models to explain these phenomena theoretically. First, we develop a routing scheme, used to forward information in a network, and analyze its efficiency on strongly hyperbolic unit disk graphs. For the special case of hyperbolic random graphs, our algorithm beats existing performance lower bounds. Afterwards, we use the hyperbolic random graph model to theoretically explain empirical observations about the performance of the bidirectional breadth-first search. Finally, we develop algorithms for computing optimal and nearly optimal vertex covers (problems known to be NP-hard) and show that, on hyperbolic random graphs, they run in polynomial and quasi-linear time, respectively. Our theoretical analyses reveal interesting properties of hyperbolic random graphs and our empirical studies present evidence that these properties, as well as our algorithmic improvements translate back into practice.}, language = {en} } @phdthesis{Draisbach2022, author = {Draisbach, Uwe}, title = {Efficient duplicate detection and the impact of transitivity}, doi = {10.25932/publishup-57214}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-572140}, school = {Universit{\"a}t Potsdam}, pages = {x, 150}, year = {2022}, abstract = {Duplicate detection describes the process of finding multiple representations of the same real-world entity in the absence of a unique identifier, and has many application areas, such as customer relationship management, genealogy and social sciences, or online shopping. Due to the increasing amount of data in recent years, the problem has become even more challenging on the one hand, but has led to a renaissance in duplicate detection research on the other hand. This thesis examines the effects and opportunities of transitive relationships on the duplicate detection process. Transitivity implies that if record pairs ⟨ri,rj⟩ and ⟨rj,rk⟩ are classified as duplicates, then also record pair ⟨ri,rk⟩ has to be a duplicate. However, this reasoning might contradict with the pairwise classification, which is usually based on the similarity of objects. An essential property of similarity, in contrast to equivalence, is that similarity is not necessarily transitive. First, we experimentally evaluate the effect of an increasing data volume on the threshold selection to classify whether a record pair is a duplicate or non-duplicate. Our experiments show that independently of the pair selection algorithm and the used similarity measure, selecting a suitable threshold becomes more difficult with an increasing number of records due to an increased probability of adding a false duplicate to an existing cluster. Thus, the best threshold changes with the dataset size, and a good threshold for a small (possibly sampled) dataset is not necessarily a good threshold for a larger (possibly complete) dataset. As data grows over time, earlier selected thresholds are no longer a suitable choice, and the problem becomes worse for datasets with larger clusters. Second, we present with the Duplicate Count Strategy (DCS) and its enhancement DCS++ two alternatives to the standard Sorted Neighborhood Method (SNM) for the selection of candidate record pairs. DCS adapts SNMs window size based on the number of detected duplicates and DCS++ uses transitive dependencies to save complex comparisons for finding duplicates in larger clusters. We prove that with a proper (domain- and data-independent!) threshold, DCS++ is more efficient than SNM without loss of effectiveness. Third, we tackle the problem of contradicting pairwise classifications. Usually, the transitive closure is used for pairwise classifications to obtain a transitively closed result set. However, the transitive closure disregards negative classifications. We present three new and several existing clustering algorithms and experimentally evaluate them on various datasets and under various algorithm configurations. The results show that the commonly used transitive closure is inferior to most other clustering algorithms, especially for the precision of results. In scenarios with larger clusters, our proposed EMCC algorithm is, together with Markov Clustering, the best performing clustering approach for duplicate detection, although its runtime is longer than Markov Clustering due to the subexponential time complexity. EMCC especially outperforms Markov Clustering regarding the precision of the results and additionally has the advantage that it can also be used in scenarios where edge weights are not available.}, language = {en} } @phdthesis{Roumen2023, author = {Roumen, Thijs}, title = {Portable models for laser cutting}, doi = {10.25932/publishup-57814}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-578141}, school = {Universit{\"a}t Potsdam}, pages = {xx, 170}, year = {2023}, abstract = {Laser cutting is a fast and precise fabrication process. This makes laser cutting a powerful process in custom industrial production. Since the patents on the original technology started to expire, a growing community of tech-enthusiasts embraced the technology and started sharing the models they fabricate online. Surprisingly, the shared models appear to largely be one-offs (e.g., they proudly showcase what a single person can make in one afternoon). For laser cutting to become a relevant mainstream phenomenon (as opposed to the current tech enthusiasts and industry users), it is crucial to enable users to reproduce models made by more experienced modelers, and to build on the work of others instead of creating one-offs. We create a technological basis that allows users to build on the work of others—a progression that is currently held back by the use of exchange formats that disregard mechanical differences between machines and therefore overlook implications with respect to how well parts fit together mechanically (aka engineering fit). For the field to progress, we need a machine-independent sharing infrastructure. In this thesis, we outline three approaches that together get us closer to this: (1) 2D cutting plans that are tolerant to machine variations. Our initial take is a minimally invasive approach: replacing machine-specific elements in cutting plans with more tolerant elements using mechanical hacks like springs and wedges. The resulting models fabricate on any consumer laser cutter and in a range of materials. (2) sharing models in 3D. To allow building on the work of others, we build a 3D modeling environment for laser cutting (kyub). After users design a model, they export their 3D models to 2D cutting plans optimized for the machine and material at hand. We extend this volumetric environment with tools to edit individual plates, allowing users to leverage the efficiency of volumetric editing while having control over the most detailed elements in laser-cutting (plates) (3) converting legacy 2D cutting plans to 3D models. To handle legacy models, we build software to interactively reconstruct 3D models from 2D cutting plans. This allows users to reuse the models in more productive ways. We revisit this by automating the assembly process for a large subset of models. The above-mentioned software composes a larger system (kyub, 140,000 lines of code). This system integration enables the push towards actual use, which we demonstrate through a range of workshops where users build complex models such as fully functional guitars. By simplifying sharing and re-use and the resulting increase in model complexity, this line of work forms a small step to enable personal fabrication to scale past the maker phenomenon, towards a mainstream phenomenon—the same way that other fields, such as print (postscript) and ultimately computing itself (portable programming languages, etc.) reached mass adoption.}, language = {en} } @phdthesis{Niephaus2022, author = {Niephaus, Fabio}, title = {Exploratory tool-building platforms for polyglot virtual machines}, doi = {10.25932/publishup-57177}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-571776}, school = {Universit{\"a}t Potsdam}, pages = {xxi, 249}, year = {2022}, abstract = {Polyglot programming allows developers to use multiple programming languages within the same software project. While it is common to use more than one language in certain programming domains, developers also apply polyglot programming for other purposes such as to re-use software written in other languages. Although established approaches to polyglot programming come with significant limitations, for example, in terms of performance and tool support, developers still use them to be able to combine languages. Polyglot virtual machines (VMs) such as GraalVM provide a new level of polyglot programming, allowing languages to directly interact with each other. This reduces the amount of glue code needed to combine languages, results in better performance, and enables tools such as debuggers to work across languages. However, only a little research has focused on novel tools that are designed to support developers in building software with polyglot VMs. One reason is that tool-building is often an expensive activity, another one is that polyglot VMs are still a moving target as their use cases and requirements are not yet well understood. In this thesis, we present an approach that builds on existing self-sustaining programming systems such as Squeak/Smalltalk to enable exploratory programming, a practice for exploring and gathering software requirements, and re-use their extensive tool-building capabilities in the context of polyglot VMs. Based on TruffleSqueak, our implementation for the GraalVM, we further present five case studies that demonstrate how our approach helps tool developers to design and build tools for polyglot programming. We further show that TruffleSqueak can also be used by application developers to build and evolve polyglot applications at run-time and by language and runtime developers to understand the dynamic behavior of GraalVM languages and internals. Since our platform allows all these developers to apply polyglot programming, it can further help to better understand the advantages, use cases, requirements, and challenges of polyglot VMs. Moreover, we demonstrate that our approach can also be applied to other polyglot VMs and that insights gained through it are transferable to other programming systems. We conclude that our research on tools for polyglot programming is an important step toward making polyglot VMs more approachable for developers in practice. With good tool support, we believe polyglot VMs can make it much more common for developers to take advantage of multiple languages and their ecosystems when building software.}, language = {en} } @misc{LadleifWeske2021, author = {Ladleif, Jan and Weske, Mathias}, title = {Which Event Happened First? Deferred Choice on Blockchain Using Oracles}, series = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, volume = {4}, journal = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, doi = {10.25932/publishup-55068}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-550681}, pages = {1 -- 16}, year = {2021}, abstract = {First come, first served: Critical choices between alternative actions are often made based on events external to an organization, and reacting promptly to their occurrence can be a major advantage over the competition. In Business Process Management (BPM), such deferred choices can be expressed in process models, and they are an important aspect of process engines. Blockchain-based process execution approaches are no exception to this, but are severely limited by the inherent properties of the platform: The isolated environment prevents direct access to external entities and data, and the non-continual runtime based entirely on atomic transactions impedes the monitoring and detection of events. In this paper we provide an in-depth examination of the semantics of deferred choice, and transfer them to environments such as the blockchain. We introduce and compare several oracle architectures able to satisfy certain requirements, and show that they can be implemented using state-of-the-art blockchain technology.}, language = {en} } @phdthesis{Rothenberger2022, author = {Rothenberger, Ralf}, title = {Satisfiability thresholds for non-uniform random k-SAT}, doi = {10.25932/publishup-54970}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-549702}, school = {Universit{\"a}t Potsdam}, pages = {x, 163}, year = {2022}, abstract = {Boolean Satisfiability (SAT) is one of the problems at the core of theoretical computer science. It was the first problem proven to be NP-complete by Cook and, independently, by Levin. Nowadays it is conjectured that SAT cannot be solved in sub-exponential time. Thus, it is generally assumed that SAT and its restricted version k-SAT are hard to solve. However, state-of-the-art SAT solvers can solve even huge practical instances of these problems in a reasonable amount of time. Why is SAT hard in theory, but easy in practice? One approach to answering this question is investigating the average runtime of SAT. In order to analyze this average runtime the random k-SAT model was introduced. The model generates all k-SAT instances with n variables and m clauses with uniform probability. Researching random k-SAT led to a multitude of insights and tools for analyzing random structures in general. One major observation was the emergence of the so-called satisfiability threshold: A phase transition point in the number of clauses at which the generated formulas go from asymptotically almost surely satisfiable to asymptotically almost surely unsatisfiable. Additionally, instances around the threshold seem to be particularly hard to solve. In this thesis we analyze a more general model of random k-SAT that we call non-uniform random k-SAT. In contrast to the classical model each of the n Boolean variables now has a distinct probability of being drawn. For each of the m clauses we draw k variables according to the variable distribution and choose their signs uniformly at random. Non-uniform random k-SAT gives us more control over the distribution of Boolean variables in the resulting formulas. This allows us to tailor distributions to the ones observed in practice. Notably, non-uniform random k-SAT contains the previously proposed models random k-SAT, power-law random k-SAT and geometric random k-SAT as special cases. We analyze the satisfiability threshold in non-uniform random k-SAT depending on the variable probability distribution. Our goal is to derive conditions on this distribution under which an equivalent of the satisfiability threshold conjecture holds. We start with the arguably simpler case of non-uniform random 2-SAT. For this model we show under which conditions a threshold exists, if it is sharp or coarse, and what the leading constant of the threshold function is. These are exactly the three ingredients one needs in order to prove or disprove the satisfiability threshold conjecture. For non-uniform random k-SAT with k=3 we only prove sufficient conditions under which a threshold exists. We also show some properties of the variable probabilities under which the threshold is sharp in this case. These are the first results on the threshold behavior of non-uniform random k-SAT.}, language = {en} } @article{BuchemOkatan2021, author = {Buchem, Ilona and Okatan, Ebru}, title = {Using the Addie Model to Produce MOOCs}, series = {EMOOCs 2021}, volume = {2021}, journal = {EMOOCs 2021}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-512-5}, doi = {10.25932/publishup-51727}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-517274}, pages = {249 -- 258}, year = {2021}, abstract = {MOOCs have been produced using a variety of instructional design approaches and frameworks. This paper presents experiences from the instructional approach based on the ADDIE model applied to designing and producing MOOCs in the Erasmus+ strategic partnership on Open Badge Ecosystem for Research Data Management (OBERRED). Specifically, this paper describes the case study of the production of the MOOC "Open Badges for Open Science", delivered on the European MOOC platform EMMA. The key goal of this MOOC is to help learners develop a capacity to use Open Badges in the field of Research Data Management (RDM). To produce the MOOC, the ADDIE model was applied as a generic instructional design model and a systematic approach to the design and development following the five design phases: Analysis, Design, Development, Implementation, Evaluation. This paper outlines the MOOC production including methods, templates and tools used in this process including the interactive micro-content created with H5P in form of Open Educational Resources and digital credentials created with Open Badges and issued to MOOC participants upon successful completion of MOOC levels. The paper also outlines the results from qualitative evaluation, which applied the cognitive walkthrough methodology to elicit user requirements. The paper ends with conclusions about pros and cons of using the ADDIE model in MOOC production and formulates recommendations for further work in this area.}, language = {en} } @article{Khalil2021, author = {Khalil, Mohammad}, title = {Who Are the Students of MOOCs?}, series = {EMOOCs 2021}, volume = {2021}, journal = {EMOOCs 2021}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-512-5}, doi = {10.25932/publishup-51729}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-517298}, pages = {259 -- 269}, year = {2021}, abstract = {Clustering in education is important in identifying groups of objects in order to find linked patterns of correlations in educational datasets. As such, MOOCs provide a rich source of educational datasets which enable a wide selection of options to carry out clustering and an opportunity for cohort analyses. In this experience paper, five research studies on clustering in MOOCs are reviewed, drawing out several reasonings, methods, and students' clusters that reflect certain kinds of learning behaviours. The collection of the varied clusters shows that each study identifies and defines clusters according to distinctive engagement patterns. Implications and a summary are provided at the end of the paper.}, language = {en} } @article{CasiraghiSancassaniBrambilla2021, author = {Casiraghi, Daniela and Sancassani, Susanna and Brambilla, Federica}, title = {The Role of MOOCs in the New Educational Scenario}, series = {EMOOCs 2021}, volume = {2021}, journal = {EMOOCs 2021}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-512-5}, doi = {10.25932/publishup-51731}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-517315}, pages = {271 -- 274}, year = {2021}, abstract = {The COVID-19 pandemic emergency has forced a profound reshape of our lives. Our way of working and studying has been disrupted with the result of an acceleration of the shift to the digital world. To properly adapt to this change, we need to outline and implement new urgent strategies and approaches which put learning at the center, supporting workers and students to further develop "future proof" skills. In the last period, universities and educational institutions have demonstrated that they can play an important role in this context, also leveraging on the potential of Massive Open Online Courses (MOOCs) which proved to be an important vehicle of flexibility and adaptation in a general context characterised by several constraints. From March 2020 till now, we have witnessed an exponential growth of MOOCs enrollments numbers, with "traditional" students interested in different topics not necessarily integrated to their curricular studies. To support students and faculty development during the spreading of the pandemic, Politecnico di Milano focused on one main dimension: faculty development for a better integration of digital tools and contents in the e-learning experience. The current discussion focuses on how to improve the integration of MOOCs in the in-presence activities to create meaningful learning and teaching experiences, thereby leveraging blended learning approaches to engage both students and external stakeholders to equip them with future job relevance skills.}, language = {en} } @article{HenseBernd2021, author = {Hense, Julia and Bernd, Mike}, title = {Podcasts, Microcontent \& MOOCs}, series = {EMOOCs 2021}, volume = {2021}, journal = {EMOOCs 2021}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-512-5}, doi = {10.25932/publishup-51736}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-517363}, pages = {289 -- 295}, year = {2021}, language = {en} } @article{MihaescuAndoneVasiu2021, author = {Mihaescu, Vlad and Andone, Diana and Vasiu, Radu}, title = {DigiCulture MOOC Courses Piloting with Students}, series = {EMOOCs 2021}, volume = {2021}, journal = {EMOOCs 2021}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-512-5}, doi = {10.25932/publishup-51733}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-517339}, pages = {275 -- 279}, year = {2021}, language = {en} } @article{LangsethJacobsenHaugsbakken2021, author = {Langseth, Inger and Jacobsen, Dan Yngve and Haugsbakken, Halvdan}, title = {MOOCs for Flexible and Lifelong Learning in Higher Education}, series = {EMOOCs 2021}, volume = {2021}, journal = {EMOOCs 2021}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-512-5}, doi = {10.25932/publishup-51693}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-516930}, pages = {63 -- 78}, year = {2021}, abstract = {In this paper, we take a closer look at the development of Massive Open Online Courses (MOOC) in Norway. We want to contribute to nuancing the image of a sound and sustainable policy for flexible and lifelong learning at national and institutional levels and point to some critical areas of improvement in higher education institutions (HEI). 10 semistructured qualitative interviews were carried out in the autumn 2020 at ten different HE institutions across Norway. The informants were strategically selected among employees involved in MOOC-technology, MOOCproduction and MOOC-support over a period of time stretching from 2010-2020. A main finding is that academics engaged in MOOCs find that their entrepreneurial ideas and results, to a large extent, are overlooked at higher institutional levels, and that progress is frustratingly slow. So far, there seems to be little common understanding of the MOOC-concept and the disruptive and transformative effect that MOOC-technology may have at HEIs. At national levels, digital strategies, funding and digital infrastructure are mainly provided in governmental silos. We suggest that governmental bodies and institutional stake holders pay more attention to entrepreneurial MOOC-initiatives to develop sustainability in flexible and lifelong learning in HEIs. This involves connecting the generous funding of digital projects to the provision of a national portal and platform for Open Access to education. To facilitate sustainable lifelong learning in and across HEIs, more quality control to enhance the legitimacy of MOOC certificates and micro-credentials is also a necessary measure.}, language = {en} } @article{CortiBaudoTurroetal.2021, author = {Corti, Paola and Baudo, Valeria and Turr{\´o}, Carlos and Santos, Ana Moura and Nilsson, Charlotta}, title = {Fostering Women to STEM MOOCs}, series = {EMOOCs 2021}, volume = {2021}, journal = {EMOOCs 2021}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-512-5}, doi = {10.25932/publishup-51714}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-517141}, pages = {129 -- 141}, year = {2021}, abstract = {In the context of the Fostering Women to STEM MOOCs (FOSTWOM) project, we present here the general ideas of a gender balance Toolkit, i.e. a collection of recommendations and resources for instructional designers, visual designers, and teaching staff to apply while designing and preparing storyboards for MOOCs and their visual components, so that future STEM online courses have a greater chance to be more inclusive and gender-balanced. Overall, The FOSTWOM project intends to use the inclusive potential of Massive Open Online Courses to propose STEM subjects free of stereotyping assumptions on gender abilities. Moreover, the consortium is interested in attracting girls and young women to science and technology careers, through accessible online content, which can include role models' interviews, relevant real-world situations, and strong conceptual frameworks.}, language = {en} }