@article{ReinRamsonLinckeetal.2017, author = {Rein, Patrick and Ramson, Stefan and Lincke, Jens and Felgentreff, Tim and Hirschfeld, Robert}, title = {Group-Based Behavior Adaptation Mechanisms in Object-Oriented Systems}, series = {IEEE software}, volume = {34}, journal = {IEEE software}, number = {6}, publisher = {Inst. of Electr. and Electronics Engineers}, address = {Los Alamitos}, issn = {0740-7459}, doi = {10.1109/MS.2017.4121224}, pages = {78 -- 82}, year = {2017}, abstract = {Dynamic and distributed systems require behavior adaptations for groups of objects. Group-based behavior adaptation mechanisms scope adaptations to objects matching conditions beyond class membership. The specification of groups can be explicit or implicit.}, language = {en} } @article{BleifussBornemannJohnsonetal.2018, author = {Bleifuss, Tobias and Bornemann, Leon and Johnson, Theodore and Kalashnikov, Dmitri and Naumann, Felix and Srivastava, Divesh}, title = {Exploring Change}, series = {Proceedings of the VLDB Endowment}, volume = {12}, journal = {Proceedings of the VLDB Endowment}, number = {2}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {2150-8097}, doi = {10.14778/3282495.3282496}, pages = {85 -- 98}, year = {2018}, abstract = {Data and metadata in datasets experience many different kinds of change. Values axe inserted, deleted or updated; rows appear and disappear; columns are added or repurposed, etc. In such a dynamic situation, users might have many questions related to changes in the dataset, for instance which parts of the data are trustworthy and which are not? Users will wonder: How many changes have there been in the recent minutes, days or years? What kind of changes were made at which points of time? How dirty is the data? Is data cleansing required? The fact that data changed can hint at different hidden processes or agendas: a frequently crowd-updated city name may be controversial; a person whose name has been recently changed may be the target of vandalism; and so on. We show various use cases that benefit from recognizing and exploring such change. We envision a system and methods to interactively explore such change, addressing the variability dimension of big data challenges. To this end, we propose a model to capture change and the process of exploring dynamic data to identify salient changes. We provide exploration primitives along with motivational examples and measures for the volatility of data. We identify technical challenges that need to be addressed to make our vision a reality, and propose directions of future work for the data management community.}, language = {en} } @article{MaximovaGieseKrause2018, author = {Maximova, Maria and Giese, Holger and Krause, Christian}, title = {Probabilistic timed graph transformation systems}, series = {Journal of Logical and Algebraic Methods in Programming}, volume = {101}, journal = {Journal of Logical and Algebraic Methods in Programming}, publisher = {Elsevier}, address = {New York}, issn = {2352-2208}, doi = {10.1016/j.jlamp.2018.09.003}, pages = {110 -- 131}, year = {2018}, abstract = {Today, software has become an intrinsic part of complex distributed embedded real-time systems. The next generation of embedded real-time systems will interconnect the today unconnected systems via complex software parts and the service-oriented paradigm. Due to these interconnections, the architecture of systems can be subject to changes at run-time, e.g. when dynamic binding of service end-points is employed or complex collaborations are established dynamically. However, suitable formalisms and techniques that allow for modeling and analysis of timed and probabilistic behavior of such systems as well as of their structure dynamics do not exist so far. To fill the identified gap, we propose Probabilistic Timed Graph Transformation Systems (PTGTSs) as a high-level description language that supports all the necessary aspects of structure dynamics, timed behavior, and probabilistic behavior. We introduce the formal model of PTGTSs in this paper as well as present and formally verify a mapping of models with finite state spaces to probabilistic timed automata (PTA) that allows to use the PRISM model checker to analyze PTGTS models with respect to PTCTL properties. (C) 2018 Elsevier Inc. All rights reserved.}, language = {en} } @article{HirschfeldKawarnura2006, author = {Hirschfeld, Robert and Kawarnura, Katsuya}, title = {Dynamic service adaptation}, series = {Software : practice \& experience}, volume = {36}, journal = {Software : practice \& experience}, number = {11-12}, publisher = {Wiley}, address = {Chichester}, issn = {0038-0644}, doi = {10.1002/spe.766}, pages = {1115 -- 1131}, year = {2006}, abstract = {Change can be observed in our environment and in the technology we build. While changes in the environment happen continuously and implicitly, our technology has to be kept in sync with the changing world around it. Although we can prepare for some of the changes for most of them we cannot. This is especially true for next-generation mobile communication systems that are expected to support the creation of a ubiquitous society where virtually everything is connected and made available within an organic information network. Resources will frequently join or leave the network, new types of media or new combinations of existing types will be used to interact and cooperate, and services will be tailored to preferences and needs of individual customers to better meet their needs. This paper outlines our research in the area of dynamic service adaptation to provide concepts and technologies allowing for such environments. Copyright (C) 2006 John Wiley \& Sons, Ltd.}, language = {en} } @article{DyckGieseLambers2019, author = {Dyck, Johannes and Giese, Holger and Lambers, Leen}, title = {Automatic verification of behavior preservation at the transformation level for relational model transformation}, series = {Software and systems modeling}, volume = {18}, journal = {Software and systems modeling}, number = {5}, publisher = {Springer}, address = {Heidelberg}, issn = {1619-1366}, doi = {10.1007/s10270-018-00706-9}, pages = {2937 -- 2972}, year = {2019}, abstract = {The correctness of model transformations is a crucial element for model-driven engineering of high-quality software. In particular, behavior preservation is an important correctness property avoiding the introduction of semantic errors during the model-driven engineering process. Behavior preservation verification techniques show some kind of behavioral equivalence or refinement between source and target model of the transformation. Automatic tool support is available for verifying behavior preservation at the instance level, i.e., for a given source and target model specified by the model transformation. However, until now there is no sound and automatic verification approach available at the transformation level, i.e., for all source and target models. In this article, we extend our results presented in earlier work (Giese and Lambers, in: Ehrig et al (eds) Graph transformations, Springer, Berlin, 2012) and outline a new transformation-level approach for the sound and automatic verification of behavior preservation captured by bisimulation resp.simulation for outplace model transformations specified by triple graph grammars and semantic definitions given by graph transformation rules. In particular, we first show how behavior preservation can be modeled in a symbolic manner at the transformation level and then describe that transformation-level verification of behavior preservation can be reduced to invariant checking of suitable conditions for graph transformations. We demonstrate that the resulting checking problem can be addressed by our own invariant checker for an example of a transformation between sequence charts and communicating automata.}, language = {en} } @misc{HoelzleBjoerkVisscher2019, author = {H{\"o}lzle, Katharina and Bj{\"o}rk, Jennie and Visscher, Klaasjan}, title = {Editorial}, series = {Creativity and innovation management}, volume = {28}, journal = {Creativity and innovation management}, number = {1}, publisher = {Wiley}, address = {Hoboken}, issn = {0963-1690}, doi = {10.1111/caim.12307}, pages = {3 -- 4}, year = {2019}, abstract = {The new year starts and many of us have right away been burdened with conference datelines, grant proposal datelines, teaching obligations, paper revisions and many other things. While being more or less successful in fulfilling To-Do lists and ticking of urgent (and sometimes even important) things, we often feel that our ability to be truly creative or innovative is rather restrained by this (external pressure). With this, we are not alone. Many studies have shown that stress does influence overall work performance and satisfaction. Furthermore, more and more students and entry-levels look for work-life balance and search for employers that offer a surrounding and organization considering these needs. High-Tech and start-up companies praise themselves for their "Feel-Good managers" or Yoga programs. But is this really helpful? Is there indeed a relationship between stress, adverse work environment and creativity or innovation? What are the supporting factors in a work environment that lets employees be more creative? What kind of leadership do we need for innovative behaviour and to what extent can an organization create support structures that reduce the stress we feel? The first issue of Creativity and Innovation Management in 2019 gives some first answers to these questions and hopefully some food for thought. The first paper written by Dirk De Clercq, and Imanol Belausteguigoitia starts with the question which impact work overload has on creative behaviour. The authors look at how employees' perceptions of work overload reduces their creative behaviour. While they find empirical proof for this relationship, they can also show that the effect is weaker with higher levels of passion for work, emotion sharing, and organizational commitment. The buffering effects of emotion sharing and organizational commitment are particularly strong when they are combined with high levels of passion for work. Their findings give first empirical proof that organizations can and should take an active role in helping their employees reducing the effects of adverse work conditions in order to become or stay creative. However, not only work overload is harming creative behaviour, also the fear of losing one's job has detrimental effects on innovative work behaviour. Anahi van Hootegem, Wendy Niesen and Hans de Witte verify that stress and adverse environmental conditions shape our perception of work. Using threat rigidity theory and an empirical study of 394 employees, they show that the threat of job loss impairs employees' innovativeness through increased irritation and decreased concentration. Organizations can help their employees coping better with this insecurity by communicating more openly and providing different support structures. Support often comes from leadership and the support of the supervisor can clearly shape an employee's motivation to show creative behaviour. Wenjing Cai, Evgenia Lysova, Bart A. G. Bossink, Svetlana N. Khapova and Weidong Wang report empirical findings from a large-scale survey in China where they find that supervisor support for creativity and job characteristics effectively activate individual psychological capital associated with employee creativity. On a slight different notion, Gisela B{\"a}cklander looks at agile practices in a very well-known High Tech firm. In "Doing Complexity Leadership Theory: How agile coaches at Spotify practice enabling leadership", she researches the role of agile coaches and how they practice enabling leadership, a key balancing force in complexity leadership. She finds that the active involvement of coaches in observing group dynamics, surfacing conflict and facilitating and encouraging constructive dialogue leads to a positive working environment and the well-being of employees. Quotes from the interviews suggest that the flexible structure provided by the coaches may prove a fruitful way to navigate and balance autonomy and alignment in organizations. The fifth paper of Frederik Anseel, Michael Vandamme, Wouter Duyck and Eric Rietzchel goes a little further down this road and researches how groups can be motivated better to select truly creative ideas. We know from former studies that groups often perform rather poorly when it comes to selecting creative ideas for implementation. The authors find in an extensive field experiment that under conditions of high epistemic motivation, proself motivated groups select significantly more creative and original ideas than prosocial groups. They conclude however, that more research is needed to understand better why these differences occur. The prosocial behaviour of groups is also the theme of Karin Moser, Jeremy F. Dawson and Michael A. West's paper on "Antecedents of team innovation in health care teams". They look at team-level motivation and how a prosocial team environment, indicated by the level of helping behaviour and information-sharing, may foster innovation. Their results support the hypotheses of both information-sharing and helping behaviour on team innovation. They suggest that both factors may actually act as buffer against constraints in team work, such as large team size or high occupational diversity in cross-functional health care teams, and potentially turn these into resources supporting team innovation rather than acting as barriers. Away from teams and onto designing favourable work environments, the seventh paper of Ferney Osorio, Laurent Dupont, Mauricio Camargo, Pedro Palominos, Jose Ismael Pena and Miguel Alfaro looks into innovation laboratories. Although several studies have tackled the problem of design, development and sustainability of these spaces for innovation, there is still a gap in understanding how the capabilities and performance of these environments are affected by the strategic intentions at the early stages of their design and functioning. The authors analyse and compare eight existing frameworks from literature and propose a new framework for researchers and practitioners aiming to assess or to adapt innovation laboratories. They test their framework in an exploratory study with fifteen laboratories from five different countries and give recommendations for the future design of these laboratories. From design to design thinking goes our last paper from Rama Krishna Reddy Kummitha on "Design Thinking in Social Organisations: Understanding the role of user engagement" where she studies how users persuade social organisations to adopt design thinking. Looking at four social organisations in India during 2008 to 2013, she finds that the designer roles are blurred when social organisations adopt design thinking, while users in the form of interconnecting agencies reduce the gap between designers and communities. The last two articles were developed from papers presented at the 17th International CINet conference organized in Turin in 2016 by Paolo Neirotti and his colleagues. In the first article, F{\´a}bio Gama, Johan Frishammar and Vinit Parida focus on ideation and open innovation in small- and medium-sized enterprises. They investigate the relationship between systematic idea generation and performance and the moderating role of market-based partnerships. Based on a survey among manufacturing SMEs, they conclude that higher levels of performance are reached and that collaboration with customers and suppliers pays off most when idea generation is done in a highly systematic way. The second article, by Anna Holmquist, Mats Magnusson and Mona Livholts, resonates the theme of the CINet conference 'Innovation and Tradition; combining the old and the new'. They explore how tradition is used in craft-based design practices to create new meaning. Applying a narrative 'research through design' approach they uncover important design elements, and tensions between them. Please enjoy this first issue of CIM in 2019 and we wish you creativity and innovation without too much stress in the months to come.}, language = {en} } @misc{BjoerkHoelzle2018, author = {Bj{\"o}rk, Jennie and H{\"o}lzle, Katharina}, title = {Editorial}, series = {Creativity and innovation management}, volume = {27}, journal = {Creativity and innovation management}, number = {4}, publisher = {Wiley}, address = {Hoboken}, issn = {0963-1690}, doi = {10.1111/caim.12298}, pages = {373 -- 374}, year = {2018}, abstract = {"Never doubt that a small group of thoughtful, committed citizens can change the world; indeed, it's the only thing that ever has. - Margaret Mead." With the last issue of this year we want to point out directions towards what will come and what challenges and opportunities lie ahead of us. More needed than ever are joint creative efforts to find ways to collaborate and innovate in order to secure the wellbeing of our earth for the next generation to come. We have found ourselves puzzled that we could assemble a sustainability issue without having a call for papers or a special issue. In fact, many of the submissions we currently receive, deal with sustainable, ecological or novel approaches to management and organizations. As creativity and innovation are undisputable necessary ingredients for reaching the sustainable development goals, empirical proof and research in this area are still in their infancy. While the role of design and design thinking has been highlighted before for solving wicked societal problems, a lot more research is needed which creative and innovative ways organisations and societies can take to find solutions to climate change, poverty, hunger and education. We would therefore like to call to you, our readers and writers to tackle these problems with your research. The first article in this issue addresses one of the above named challenges - the role of innovation for achieving the transition to a low-carbon energy world. In "Innovating for low-carbon energy through hydropower: Enabling a conservation charity's transition to a low-carbon community", the authors John Gallagher, Paul Coughlan, A. Prysor Williams and Aonghus McNabola look at how an eco-design approach has supported a community transition to low-carbon. They highlight the importance of effective management as well as external collaboration and how the key for success lay in fostering an open environment for creativity and idea sharing. The second article addresses another of the grand challenges, the future of mobility and uses a design-driven approach to develop scenarios for mobility in cities. In "Designing radical innovations of meanings for society: envisioning new scenarios for smart mobility", the authors Claudio Dell'Era, Naiara Altuna and Roberto Verganti investigate how new meanings can be designed and proposed to society rather than to individuals in the particular context of smart mobility. Through two case studies the authors argue for a multi-level perspective, taking the perspective of the society to solve societal challenges while considering the needs of the individual. The latter is needed because we will not change if our needs are not addressed. Furthermore, the authors find that both, meaning and technology need to be considered to create radical innovation for society. The role of meaning continues in the third article in this issue. The authors Marta Gasparin and William Green show in their article "Reconstructing meaning without redesigning products: The case of the Serie7 chair" how meaning changes over time even though the product remains the same. Through an in-depth retrospective study of the Serie 7 chair the authors investigate the relationship between meaning and the materiality of the object, and show the importance of materiality in constructing product meaning over long periods. Translating this meaning over the course of the innovation process is an important task of management in order to gain buy-in from all involved stakeholders. In the following article "A systematic approach for new technology development by using a biomimicry-based TRIZ contradiction matrix" the authors Byungun Yoon, Chaeguk Lim, Inchae Park and Dooseob Yoon develop a systematic process combining biomimicry and technology-based TRIZ in order to solve technological problems or develop new technologies based on completely new sources or combinations from technology and biology. In the fifth article in this issue "Innovating via Building Absorptive Capacity: Interactive Effects of Top Management Support of Learning, Employee Learning Orientation, and Decentralization Structure" the authors Li-Yun Sun, Chenwei Li and Yuntao Dong examine the effect of learning-related personal and contextual factors on organizational absorptive capability and subsequent innovative performance. The authors find positive effects as well as a moderation influence of decentralized organizational decision-making structures. In the sixth article "Creativity within boundaries: social identity and the development of new ideas in franchise systems" the authors Fanny Simon, Catherine Allix-Desfautaux, Nabil Khelil and Anne-Laure Le Nadant address the paradox of balancing novelty and conformity for creativity in a franchise system. This research is one of the first we know to explicitly address creativity and innovation in such a rigid and pre-determined system. Using a social identity perspective, they can show that social control, which may be exerted by manipulating group identity, is an efficient lever to increase both the creation and the diffusion of the idea. Furthermore, they show that franchisees who do not conform to the norm of the group are stigmatized and must face pressure from the group to adapt their behaviors. This has important implications for future research. In the following article "Exploring employee interactions and quality of contributions in intra-organisational innovation platforms" the authors Dimitra Chasanidou, Nj{\aa}l Sivertstol and Jarle Hildrum examine the user interactions in an intra-organisational innovation platform, and also address the influence of user interactions for idea development. The authors find that employees communicate through the innovation platform with different interaction, contribution and collaboration types and propose three types of contribution qualities—passive, efficient and balanced contribution. In the eighth article "Ready for Take-off": How Open Innovation influences startup success" Cristina Marullo, Elena Casprini, Alberto di Minin and Andrea Piccaluga seek to predict new venture success based on factors that can be observed in the pre-startup phase. The authors introduce different variables of founding teams and how these relate to startup success. Building on large-scale dataset of submitted business plans at UC Berkeley, they can show that teams with high skills diversity and past joint experience are a lot better able to prevent the risk of business failure at entry and to adapt the internal resources to market conditions. Furthermore, it is crucial for the team to integrate many external knowledge sources into their process (openness) in order to be successful. The crucial role of knowledge and how it is communicated and shared is the focal point of Natalya Sergeeva's and Anna Trifilova's article on "The role of storytelling in the innovation process". They authors can show how storytelling has an important role to play when it comes to motivating employees to innovate and promoting innovation success stories inside and outside the organization. The deep human desire to hear and experience stories is also addressed in the last article in this issue "Gamification Approaches to the Early Stage of Innovation" by Rui Patricio, Antonio Moreira and Francesco Zurlo. Using gamification approaches at the early stage of innovation promises to create better team coherence, let employees experience fun and engagement, improve communication and foster knowledge exchange. Using an analytical framework, the authors analyze 15 articles that have looked at gamification in the context of innovation management before. They find that gamification indeed supports firms in becoming better at performing complex innovation tasks and managing innovation challenges. Furthermore, gamification in innovation creates a space for inspiration, improves creativity and the generation of high potential ideas.}, language = {en} } @article{HeckerSteckhanEybenetal.2022, author = {Hecker, Pascal and Steckhan, Nico and Eyben, Florian and Schuller, Bj{\"o}rn Wolfgang and Arnrich, Bert}, title = {Voice Analysis for Neurological Disorder Recognition - A Systematic Review and Perspective on Emerging Trends}, series = {Frontiers in Digital Health}, journal = {Frontiers in Digital Health}, publisher = {Frontiers Media SA}, address = {Lausanne, Schweiz}, issn = {2673-253X}, doi = {10.3389/fdgth.2022.842301}, pages = {16}, year = {2022}, abstract = {Quantifying neurological disorders from voice is a rapidly growing field of research and holds promise for unobtrusive and large-scale disorder monitoring. The data recording setup and data analysis pipelines are both crucial aspects to effectively obtain relevant information from participants. Therefore, we performed a systematic review to provide a high-level overview of practices across various neurological disorders and highlight emerging trends. PRISMA-based literature searches were conducted through PubMed, Web of Science, and IEEE Xplore to identify publications in which original (i.e., newly recorded) datasets were collected. Disorders of interest were psychiatric as well as neurodegenerative disorders, such as bipolar disorder, depression, and stress, as well as amyotrophic lateral sclerosis amyotrophic lateral sclerosis, Alzheimer's, and Parkinson's disease, and speech impairments (aphasia, dysarthria, and dysphonia). Of the 43 retrieved studies, Parkinson's disease is represented most prominently with 19 discovered datasets. Free speech and read speech tasks are most commonly used across disorders. Besides popular feature extraction toolkits, many studies utilise custom-built feature sets. Correlations of acoustic features with psychiatric and neurodegenerative disorders are presented. In terms of analysis, statistical analysis for significance of individual features is commonly used, as well as predictive modeling approaches, especially with support vector machines and a small number of artificial neural networks. An emerging trend and recommendation for future studies is to collect data in everyday life to facilitate longitudinal data collection and to capture the behavior of participants more naturally. Another emerging trend is to record additional modalities to voice, which can potentially increase analytical performance.}, language = {en} } @article{ZieglerPfitznerSchulzetal.2022, author = {Ziegler, Joceline and Pfitzner, Bjarne and Schulz, Heinrich and Saalbach, Axel and Arnrich, Bert}, title = {Defending against Reconstruction Attacks through Differentially Private Federated Learning for Classification of Heterogeneous Chest X-ray Data}, series = {Sensors}, volume = {22}, journal = {Sensors}, edition = {14}, publisher = {MDPI}, address = {Basel, Schweiz}, issn = {1424-8220}, doi = {10.3390/s22145195}, pages = {25}, year = {2022}, abstract = {Privacy regulations and the physical distribution of heterogeneous data are often primary concerns for the development of deep learning models in a medical context. This paper evaluates the feasibility of differentially private federated learning for chest X-ray classification as a defense against data privacy attacks. To the best of our knowledge, we are the first to directly compare the impact of differentially private training on two different neural network architectures, DenseNet121 and ResNet50. Extending the federated learning environments previously analyzed in terms of privacy, we simulated a heterogeneous and imbalanced federated setting by distributing images from the public CheXpert and Mendeley chest X-ray datasets unevenly among 36 clients. Both non-private baseline models achieved an area under the receiver operating characteristic curve (AUC) of 0.940.94 on the binary classification task of detecting the presence of a medical finding. We demonstrate that both model architectures are vulnerable to privacy violation by applying image reconstruction attacks to local model updates from individual clients. The attack was particularly successful during later training stages. To mitigate the risk of a privacy breach, we integrated R{\´e}nyi differential privacy with a Gaussian noise mechanism into local model training. We evaluate model performance and attack vulnerability for privacy budgets ε∈{1,3,6,10}�∈{1,3,6,10}. The DenseNet121 achieved the best utility-privacy trade-off with an AUC of 0.940.94 for ε=6�=6. Model performance deteriorated slightly for individual clients compared to the non-private baseline. The ResNet50 only reached an AUC of 0.760.76 in the same privacy setting. Its performance was inferior to that of the DenseNet121 for all considered privacy constraints, suggesting that the DenseNet121 architecture is more robust to differentially private training.}, language = {en} } @article{OrejasPinoNavarroetal.2018, author = {Orejas, Fernando and Pino, Elvira and Navarro, Marisa and Lambers, Leen}, title = {Institutions for navigational logics for graphical structures}, series = {Theoretical computer science}, volume = {741}, journal = {Theoretical computer science}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0304-3975}, doi = {10.1016/j.tcs.2018.02.031}, pages = {19 -- 24}, year = {2018}, abstract = {We show that a Navigational Logic, i.e., a logic to express properties about graphs and about paths in graphs is a semi-exact institution. In this way, we can use a number of operations to structure and modularize our specifications. Moreover, using the properties of our institution, we also show how to structure single formulas, which in our formalism could be quite complex.}, language = {en} } @article{RichlyBrauerSchlosser2020, author = {Richly, Keven and Brauer, Janos and Schlosser, Rainer}, title = {Predicting location probabilities of drivers to improved dispatch decisions of transportation network companies based on trajectory data}, series = {Proceedings of the 9th International Conference on Operations Research and Enterprise Systems - ICORES}, journal = {Proceedings of the 9th International Conference on Operations Research and Enterprise Systems - ICORES}, publisher = {Springer}, address = {Berlin}, pages = {12}, year = {2020}, abstract = {The demand for peer-to-peer ridesharing services increased over the last years rapidly. To cost-efficiently dispatch orders and communicate accurate pick-up times is challenging as the current location of each available driver is not exactly known since observed locations can be outdated for several seconds. The developed trajectory visualization tool enables transportation network companies to analyze dispatch processes and determine the causes of unexpected delays. As dispatching algorithms are based on the accuracy of arrival time predictions, we account for factors like noise, sample rate, technical and economic limitations as well as the duration of the entire process as they have an impact on the accuracy of spatio-temporal data. To improve dispatching strategies, we propose a prediction approach that provides a probability distribution for a driver's future locations based on patterns observed in past trajectories. We demonstrate the capabilities of our prediction results to ( i) avoid critical delays, (ii) to estimate waiting times with higher confidence, and (iii) to enable risk considerations in dispatching strategies.}, language = {en} } @article{SoechtingTrapp2020, author = {S{\"o}chting, Maximilian and Trapp, Matthias}, title = {Controlling image-stylization techniques using eye tracking}, series = {Science and Technology Publications}, journal = {Science and Technology Publications}, publisher = {Springer}, address = {Berlin}, issn = {2184-4321}, pages = {10}, year = {2020}, abstract = {With the spread of smart phones capable of taking high-resolution photos and the development of high-speed mobile data infrastructure, digital visual media is becoming one of the most important forms of modern communication. With this development, however, also comes a devaluation of images as a media form with the focus becoming the frequency at which visual content is generated instead of the quality of the content. In this work, an interactive system using image-abstraction techniques and an eye tracking sensor is presented, which allows users to experience diverting and dynamic artworks that react to their eye movement. The underlying modular architecture enables a variety of different interaction techniques that share common design principles, making the interface as intuitive as possible. The resulting experience allows users to experience a game-like interaction in which they aim for a reward, the artwork, while being held under constraints, e.g., not blinking. The co nscious eye movements that are required by some interaction techniques hint an interesting, possible future extension for this work into the field of relaxation exercises and concentration training.}, language = {en} } @article{BertiEquilleHarmouchNaumannetal.2018, author = {Berti-Equille, Laure and Harmouch, Nazar and Naumann, Felix and Novelli, Noel and Saravanan, Thirumuruganathan}, title = {Discovery of genuine functional dependencies from relational data with missing values}, series = {Proceedings of the VLDB Endowment}, volume = {11}, journal = {Proceedings of the VLDB Endowment}, number = {8}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {2150-8097}, doi = {10.14778/3204028.3204032}, pages = {880 -- 892}, year = {2018}, abstract = {Functional dependencies (FDs) play an important role in maintaining data quality. They can be used to enforce data consistency and to guide repairs over a database. In this work, we investigate the problem of missing values and its impact on FD discovery. When using existing FD discovery algorithms, some genuine FDs could not be detected precisely due to missing values or some non-genuine FDs can be discovered even though they are caused by missing values with a certain NULL semantics. We define a notion of genuineness and propose algorithms to compute the genuineness score of a discovered FD. This can be used to identify the genuine FDs among the set of all valid dependencies that hold on the data. We evaluate the quality of our method over various real-world and semi-synthetic datasets with extensive experiments. The results show that our method performs well for relatively large FD sets and is able to accurately capture genuine FDs.}, language = {en} } @misc{HorowitzFeiRamosetal.2018, author = {Horowitz, Carol R. and Fei, Kezhen and Ramos, Michelle A. and Hauser, Diane and Ellis, Stephen B. and Calman, Neil and B{\"o}ttinger, Erwin}, title = {Receipt of genetic risk information significantly improves blood pressure control among African anecestry adults with hypertension}, series = {Journal of General Internal Medicine}, volume = {33}, journal = {Journal of General Internal Medicine}, publisher = {Springer}, address = {New York}, issn = {0884-8734}, doi = {10.1007/s11606-018-4413-y}, pages = {S322 -- S323}, year = {2018}, language = {en} } @misc{AlvianoRomeroDavilaSchaub2018, author = {Alviano, Mario and Romero Davila, Javier and Schaub, Torsten H.}, title = {Preference Relations by Approximation}, series = {Sixteenth International Conference on Principles of Knowledge Representation and Reasoning}, journal = {Sixteenth International Conference on Principles of Knowledge Representation and Reasoning}, publisher = {AAAI Conference on Artificial Intelligence}, address = {Palo Alto}, pages = {2 -- 11}, year = {2018}, abstract = {Declarative languages for knowledge representation and reasoning provide constructs to define preference relations over the set of possible interpretations, so that preferred models represent optimal solutions of the encoded problem. We introduce the notion of approximation for replacing preference relations with stronger preference relations, that is, relations comparing more pairs of interpretations. Our aim is to accelerate the computation of a non-empty subset of the optimal solutions by means of highly specialized algorithms. We implement our approach in Answer Set Programming (ASP), where problems involving quantitative and qualitative preference relations can be addressed by ASPRIN, implementing a generic optimization algorithm. Unlike this, chains of approximations allow us to reduce several preference relations to the preference relations associated with ASP's native weak constraints and heuristic directives. In this way, ASPRIN can now take advantage of several highly optimized algorithms implemented by ASP solvers for computing optimal solutions}, language = {en} } @misc{RischKrestel2018, author = {Risch, Julian and Krestel, Ralf}, title = {My Approach = Your Apparatus?}, series = {Libraries}, journal = {Libraries}, publisher = {Association for Computing Machinery}, address = {New York}, isbn = {978-1-4503-5178-2}, issn = {2575-7865}, doi = {10.1145/3197026.3197038}, pages = {283 -- 292}, year = {2018}, abstract = {Comparative text mining extends from genre analysis and political bias detection to the revelation of cultural and geographic differences, through to the search for prior art across patents and scientific papers. These applications use cross-collection topic modeling for the exploration, clustering, and comparison of large sets of documents, such as digital libraries. However, topic modeling on documents from different collections is challenging because of domain-specific vocabulary. We present a cross-collection topic model combined with automatic domain term extraction and phrase segmentation. This model distinguishes collection-specific and collection-independent words based on information entropy and reveals commonalities and differences of multiple text collections. We evaluate our model on patents, scientific papers, newspaper articles, forum posts, and Wikipedia articles. In comparison to state-of-the-art cross-collection topic modeling, our model achieves up to 13\% higher topic coherence, up to 4\% lower perplexity, and up to 31\% higher document classification accuracy. More importantly, our approach is the first topic model that ensures disjunct general and specific word distributions, resulting in clear-cut topic representations.}, language = {en} } @phdthesis{Nikaj2019, author = {Nikaj, Adriatik}, title = {Restful choreographies}, doi = {10.25932/publishup-43890}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-438903}, school = {Universit{\"a}t Potsdam}, pages = {xix, 146}, year = {2019}, abstract = {Business process management has become a key instrument to organize work as many companies represent their operations in business process models. Recently, business process choreography diagrams have been introduced as part of the Business Process Model and Notation standard to represent interactions between business processes, run by different partners. When it comes to the interactions between services on the Web, Representational State Transfer (REST) is one of the primary architectural styles employed by web services today. Ideally, the RESTful interactions between participants should implement the interactions defined at the business choreography level. The problem, however, is the conceptual gap between the business process choreography diagrams and RESTful interactions. Choreography diagrams, on the one hand, are modeled from business domain experts with the purpose of capturing, communicating and, ideally, driving the business interactions. RESTful interactions, on the other hand, depend on RESTful interfaces that are designed by web engineers with the purpose of facilitating the interaction between participants on the internet. In most cases however, business domain experts are unaware of the technology behind web service interfaces and web engineers tend to overlook the overall business goals of web services. While there is considerable work on using process models during process implementation, there is little work on using choreography models to implement interactions between business processes. This thesis addresses this research gap by raising the following research question: How to close the conceptual gap between business process choreographies and RESTful interactions? This thesis offers several research contributions that jointly answer the research question. The main research contribution is the design of a language that captures RESTful interactions between participants---RESTful choreography modeling language. Formal completeness properties (with respect to REST) are introduced to validate its instances, called RESTful choreographies. A systematic semi-automatic method for deriving RESTful choreographies from business process choreographies is proposed. The method employs natural language processing techniques to translate business interactions into RESTful interactions. The effectiveness of the approach is shown by developing a prototypical tool that evaluates the derivation method over a large number of choreography models. In addition, the thesis proposes solutions towards implementing RESTful choreographies. In particular, two RESTful service specifications are introduced for aiding, respectively, the execution of choreographies' exclusive gateways and the guidance of RESTful interactions.}, language = {en} } @article{VanHoutTachmazidouBackmanetal.2020, author = {Van Hout, Cristopher V. and Tachmazidou, Ioanna and Backman, Joshua D. and Hoffman, Joshua D. and Liu, Daren and Pandey, Ashutosh K. and Gonzaga-Jauregui, Claudia and Khalid, Shareef and Ye, Bin and Banerjee, Nilanjana and Li, Alexander H. and O'Dushlaine, Colm and Marcketta, Anthony and Staples, Jeffrey and Schurmann, Claudia and Hawes, Alicia and Maxwell, Evan and Barnard, Leland and Lopez, Alexander and Penn, John and Habegger, Lukas and Blumenfeld, Andrew L. and Bai, Xiaodong and O'Keeffe, Sean and Yadav, Ashish and Praveen, Kavita and Jones, Marcus and Salerno, William J. and Chung, Wendy K. and Surakka, Ida and Willer, Cristen J. and Hveem, Kristian and Leader, Joseph B. and Carey, David J. and Ledbetter, David H. and Cardon, Lon and Yancopoulos, George D. and Economides, Aris and Coppola, Giovanni and Shuldiner, Alan R. and Balasubramanian, Suganthi and Cantor, Michael and Nelson, Matthew R. and Whittaker, John and Reid, Jeffrey G. and Marchini, Jonathan and Overton, John D. and Scott, Robert A. and Abecasis, Goncalo R. and Yerges-Armstrong, Laura M. and Baras, Aris}, title = {Exome sequencing and characterization of 49,960 individuals in the UK Biobank}, series = {Nature : the international weekly journal of science}, volume = {586}, journal = {Nature : the international weekly journal of science}, number = {7831}, publisher = {Macmillan Publishers Limited}, address = {London}, organization = {Regeneron Genetics Ctr}, issn = {0028-0836}, doi = {10.1038/s41586-020-2853-0}, pages = {749 -- 756}, year = {2020}, abstract = {The UK Biobank is a prospective study of 502,543 individuals, combining extensive phenotypic and genotypic data with streamlined access for researchers around the world(1). Here we describe the release of exome-sequence data for the first 49,960 study participants, revealing approximately 4 million coding variants (of which around 98.6\% have a frequency of less than 1\%). The data include 198,269 autosomal predicted loss-of-function (LOF) variants, a more than 14-fold increase compared to the imputed sequence. Nearly all genes (more than 97\%) had at least one carrier with a LOF variant, and most genes (more than 69\%) had at least ten carriers with a LOF variant. We illustrate the power of characterizing LOF variants in this population through association analyses across 1,730 phenotypes. In addition to replicating established associations, we found novel LOF variants with large effects on disease traits, includingPIEZO1on varicose veins,COL6A1on corneal resistance,MEPEon bone density, andIQGAP2andGMPRon blood cell traits. We further demonstrate the value of exome sequencing by surveying the prevalence of pathogenic variants of clinical importance, and show that 2\% of this population has a medically actionable variant. Furthermore, we characterize the penetrance of cancer in carriers of pathogenicBRCA1andBRCA2variants. Exome sequences from the first 49,960 participants highlight the promise of genome sequencing in large population-based studies and are now accessible to the scientific community.
Exome sequences from the first 49,960 participants in the UK Biobank highlight the promise of genome sequencing in large population-based studies and are now accessible to the scientific community.}, language = {en} } @misc{SchlosserKossmannBoissier2019, author = {Schlosser, Rainer and Kossmann, Jan and Boissier, Martin}, title = {Efficient Scalable Multi-Attribute Index Selection Using Recursive Strategies}, series = {2019 IEEE 35th International Conference on Data Engineering (ICDE)}, journal = {2019 IEEE 35th International Conference on Data Engineering (ICDE)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-7474-1}, issn = {1084-4627}, doi = {10.1109/ICDE.2019.00113}, pages = {1238 -- 1249}, year = {2019}, abstract = {An efficient selection of indexes is indispensable for database performance. For large problem instances with hundreds of tables, existing approaches are not suitable: They either exhibit prohibitive runtimes or yield far from optimal index configurations by strongly limiting the set of index candidates or not handling index interaction explicitly. We introduce a novel recursive strategy that does not exclude index candidates in advance and effectively accounts for index interaction. Using large real-world workloads, we demonstrate the applicability of our approach. Further, we evaluate our solution end to end with a commercial database system using a reproducible setup. We show that our solutions are near-optimal for small index selection problems. For larger problems, our strategy outperforms state-of-the-art approaches in both scalability and solution quality.}, language = {en} } @article{ChristopherAshwoodBittremieuxDeutschetal.2020, author = {Christopher Ashwood, Wout Bittremieux and Bittremieux, Wout and Deutsch, Eric W. and Doncheva, Nadezhda T. and Dorfer, Viktoria and Gabriels, Ralf and Gorshkov, Vladimir and Gupta, Surya and Jones, Andrew R. and K{\"a}ll, Lukas and Kopczynski, Dominik and Lane, Lydie and Lautenbacher, Ludwig and Legeay, Marc and Locard-Paulet, Marie and Mesuere, Bart and Sachsenberg, Timo and Salz, Renee and Samaras, Patroklos and Schiebenhoefer, Henning and Schmidt, Tobias and Schw{\"a}mmle, Veit and Soggiu, Alessio and Uszkoreit, Julian and Van Den Bossche, Tim and Van Puyvelde, Bart and Van Strien, Joeri and Verschaffelt, Pieter and Webel, Henry and Willems, Sander and Perez-Riverolab, Yasset and Netz, Eugen and Pfeuffer, Julianus}, title = {Proceedings of the EuBIC-MS 2020 Developers' Meeting}, series = {EuPA Open Proteomics}, volume = {24}, journal = {EuPA Open Proteomics}, publisher = {Elsevier}, address = {Amsterdam}, issn = {2212-9685}, doi = {10.1016/j.euprot.2020.11.001}, pages = {1 -- 6}, year = {2020}, abstract = {The 2020 European Bioinformatics Community for Mass Spectrometry (EuBIC-MS) Developers' meeting was held from January 13th to January 17th 2020 in Nyborg, Denmark. Among the participants were scientists as well as developers working in the field of computational mass spectrometry (MS) and proteomics. The 4-day program was split between introductory keynote lectures and parallel hackathon sessions. During the latter, the participants developed bioinformatics tools and resources addressing outstanding needs in the community. The hackathons allowed less experienced participants to learn from more advanced computational MS experts, and to actively contribute to highly relevant research projects. We successfully produced several new tools that will be useful to the proteomics community by improving data analysis as well as facilitating future research. All keynote recordings are available on https://doi.org/10.5281/zenodo.3890181.}, language = {en} } @article{StojanovicTrappRichteretal.2019, author = {Stojanovic, Vladeta and Trapp, Matthias and Richter, Rico and D{\"o}llner, J{\"u}rgen Roland Friedrich}, title = {Service-oriented semantic enrichment of indoor point clouds using octree-based multiview classification}, series = {Graphical Models}, volume = {105}, journal = {Graphical Models}, publisher = {Elsevier}, address = {San Diego}, issn = {1524-0703}, doi = {10.1016/j.gmod.2019.101039}, pages = {18}, year = {2019}, abstract = {The use of Building Information Modeling (BIM) for Facility Management (FM) in the Operation and Maintenance (O\&M) stages of the building life-cycle is intended to bridge the gap between operations and digital data, but lacks the functionality of assessing the state of the built environment due to non-automated generation of associated semantics. 3D point clouds can be used to capture the physical state of the built environment, but also lack these associated semantics. A prototypical implementation of a service-oriented architecture for classification of indoor point cloud scenes of office environments is presented, using multiview classification. The multiview classification approach is tested using a retrained Convolutional Neural Network (CNN) model - Inception V3. The presented approach for classifying common office furniture objects (chairs, sofas and desks), contained in 3D point cloud scans, is tested and evaluated. The results show that the presented approach can classify common office furniture up to an acceptable degree of accuracy, and is suitable for quick and robust semantics approximation - based on RGB (red, green and blue color channel) cubemap images of the octree partitioned areas of the 3D point cloud scan. Additional methods for web-based 3D visualization, editing and annotation of point clouds are also discussed. Using the described approach, captured scans of indoor environments can be semantically enriched using object annotations derived from multiview classification results. Furthermore, the presented approach is suited for semantic enrichment of lower resolution indoor point clouds acquired using commodity mobile devices.}, language = {en} } @misc{FrickeDoellnerAsche2018, author = {Fricke, Andreas and D{\"o}llner, J{\"u}rgen Roland Friedrich and Asche, Hartmut}, title = {Servicification - Trend or Paradigm Shift in Geospatial Data Processing?}, series = {Computational Science and Its Applications - ICCSA 2018, PT III}, volume = {10962}, journal = {Computational Science and Its Applications - ICCSA 2018, PT III}, publisher = {Springer}, address = {Cham}, isbn = {978-3-319-95168-3}, issn = {0302-9743}, doi = {10.1007/978-3-319-95168-3_23}, pages = {339 -- 350}, year = {2018}, abstract = {Currently we are witnessing profound changes in the geospatial domain. Driven by recent ICT developments, such as web services, serviceoriented computing or open-source software, an explosion of geodata and geospatial applications or rapidly growing communities of non-specialist users, the crucial issue is the provision and integration of geospatial intelligence in these rapidly changing, heterogeneous developments. This paper introduces the concept of Servicification into geospatial data processing. Its core idea is the provision of expertise through a flexible number of web-based software service modules. Selection and linkage of these services to user profiles, application tasks, data resources, or additional software allow for the compilation of flexible, time-sensitive geospatial data handling processes. Encapsulated in a string of discrete services, the approach presented here aims to provide non-specialist users with geospatial expertise required for the effective, professional solution of a defined application problem. Providing users with geospatial intelligence in the form of web-based, modular services, is a completely different approach to geospatial data processing. This novel concept puts geospatial intelligence, made available through services encapsulating rule bases and algorithms, in the centre and at the disposal of the users, regardless of their expertise.}, language = {en} } @misc{ReimannKlingbeilPasewaldtetal.2018, author = {Reimann, Max and Klingbeil, Mandy and Pasewaldt, Sebastian and Semmo, Amir and Trapp, Matthias and D{\"o}llner, J{\"u}rgen Roland Friedrich}, title = {MaeSTrO: A Mobile App for Style Transfer Orchestration using Neural Networks}, series = {International Conference on Cyberworlds (CW)}, journal = {International Conference on Cyberworlds (CW)}, editor = {Sourin, A Sourina}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-7315-7}, doi = {10.1109/CW.2018.00016}, pages = {9 -- 16}, year = {2018}, abstract = {Mobile expressive rendering gained increasing popularity among users seeking casual creativity by image stylization and supports the development of mobile artists as a new user group. In particular, neural style transfer has advanced as a core technology to emulate characteristics of manifold artistic styles. However, when it comes to creative expression, the technology still faces inherent limitations in providing low-level controls for localized image stylization. This work enhances state-of-the-art neural style transfer techniques by a generalized user interface with interactive tools to facilitate a creative and localized editing process. Thereby, we first propose a problem characterization representing trade-offs between visual quality, run-time performance, and user control. We then present MaeSTrO, a mobile app for orchestration of neural style transfer techniques using iterative, multi-style generative and adaptive neural networks that can be locally controlled by on-screen painting metaphors. At this, first user tests indicate different levels of satisfaction for the implemented techniques and interaction design.}, language = {en} } @misc{LimbergerGroplerBuschmannetal.2018, author = {Limberger, Daniel and Gropler, Anne and Buschmann, Stefan and D{\"o}llner, J{\"u}rgen Roland Friedrich and Wasty, Benjamin}, title = {OpenLL}, series = {22nd International Conference Information Visualisation (IV)}, journal = {22nd International Conference Information Visualisation (IV)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-7202-0}, doi = {10.1109/iV.2018.00039}, pages = {175 -- 181}, year = {2018}, abstract = {Today's rendering APIs lack robust functionality and capabilities for dynamic, real-time text rendering and labeling, which represent key requirements for 3D application design in many fields. As a consequence, most rendering systems are barely or not at all equipped with respective capabilities. This paper drafts the unified text rendering and labeling API OpenLL intended to complement common rendering APIs, frameworks, and transmission formats. For it, various uses of static and dynamic placement of labels are showcased and a text interaction technique is presented. Furthermore, API design constraints with respect to state-of-the-art text rendering techniques are discussed. This contribution is intended to initiate a community-driven specification of a free and open label library.}, language = {en} } @article{ReimannKlingbeilPasewaldtetal.2019, author = {Reimann, Max and Klingbeil, Mandy and Pasewaldt, Sebastian and Semmo, Amir and Trapp, Matthias and D{\"o}llner, J{\"u}rgen Roland Friedrich}, title = {Locally controllable neural style transfer on mobile devices}, series = {The Visual Computer}, volume = {35}, journal = {The Visual Computer}, number = {11}, publisher = {Springer}, address = {New York}, issn = {0178-2789}, doi = {10.1007/s00371-019-01654-1}, pages = {1531 -- 1547}, year = {2019}, abstract = {Mobile expressive rendering gained increasing popularity among users seeking casual creativity by image stylization and supports the development of mobile artists as a new user group. In particular, neural style transfer has advanced as a core technology to emulate characteristics of manifold artistic styles. However, when it comes to creative expression, the technology still faces inherent limitations in providing low-level controls for localized image stylization. In this work, we first propose a problem characterization of interactive style transfer representing a trade-off between visual quality, run-time performance, and user control. We then present MaeSTrO, a mobile app for orchestration of neural style transfer techniques using iterative, multi-style generative and adaptive neural networks that can be locally controlled by on-screen painting metaphors. At this, we enhance state-of-the-art neural style transfer techniques by mask-based loss terms that can be interactively parameterized by a generalized user interface to facilitate a creative and localized editing process. We report on a usability study and an online survey that demonstrate the ability of our app to transfer styles at improved semantic plausibility.}, language = {en} } @article{VollmerTrappSchumannetal.2018, author = {Vollmer, Jan Ole and Trapp, Matthias and Schumann, Heidrun and D{\"o}llner, J{\"u}rgen Roland Friedrich}, title = {Hierarchical spatial aggregation for level-of-detail visualization of 3D thematic data}, series = {ACM transactions on spatial algorithms and systems}, volume = {4}, journal = {ACM transactions on spatial algorithms and systems}, number = {3}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {2374-0353}, doi = {10.1145/3234506}, pages = {23}, year = {2018}, abstract = {Thematic maps are a common tool to visualize semantic data with a spatial reference. Combining thematic data with a geometric representation of their natural reference frame aids the viewer's ability in gaining an overview, as well as perceiving patterns with respect to location; however, as the amount of data for visualization continues to increase, problems such as information overload and visual clutter impede perception, requiring data aggregation and level-of-detail visualization techniques. While existing aggregation techniques for thematic data operate in a 2D reference frame (i.e., map), we present two aggregation techniques for 3D spatial and spatiotemporal data mapped onto virtual city models that hierarchically aggregate thematic data in real time during rendering to support on-the-fly and on-demand level-of-detail generation. An object-based technique performs aggregation based on scene-specific objects and their hierarchy to facilitate per-object analysis, while the scene-based technique aggregates data solely based on spatial locations, thus supporting visual analysis of data with arbitrary reference geometry. Both techniques can apply different aggregation functions (mean, minimum, and maximum) for ordinal, interval, and ratio-scaled data and can be easily extended with additional functions. Our implementation utilizes the programmable graphics pipeline and requires suitably encoded data, i.e., textures or vertex attributes. We demonstrate the application of both techniques using real-world datasets, including solar potential analyses and the propagation of pressure waves in a virtual city model.}, language = {en} } @article{ScheibelTrappLimbergeretal.2020, author = {Scheibel, Willy and Trapp, Matthias and Limberger, Daniel and D{\"o}llner, J{\"u}rgen Roland Friedrich}, title = {A taxonomy of treemap visualization techniques}, series = {Science and Technology Publications}, journal = {Science and Technology Publications}, publisher = {Springer}, address = {Berlin}, pages = {8}, year = {2020}, abstract = {A treemap is a visualization that has been specifically designed to facilitate the exploration of tree-structured data and, more general, hierarchically structured data. The family of visualization techniques that use a visual metaphor for parent-child relationships based "on the property of containment" (Johnson, 1993) is commonly referred to as treemaps. However, as the number of variations of treemaps grows, it becomes increasingly important to distinguish clearly between techniques and their specific characteristics. This paper proposes to discern between Space-filling Treemap TS, Containment Treemap TC, Implicit Edge Representation Tree TIE, and Mapped Tree TMT for classification of hierarchy visualization techniques and highlights their respective properties. This taxonomy is created as a hyponymy, i.e., its classes have an is-a relationship to one another: TS TC TIE TMT. With this proposal, we intend to stimulate a discussion on a more unambiguous classification of treemaps and, furthermore, broaden what is understood by the concept of treemap itself.}, language = {en} } @article{FehrJaramilloGutierrezOalaetal.2022, author = {Fehr, Jana and Jaramillo-Gutierrez, Giovanna and Oala, Luis and Gr{\"o}schel, Matthias I. and Bierwirth, Manuel and Balachandran, Pradeep and Werneck-Leite, Alixandro and Lippert, Christoph}, title = {Piloting a Survey-Based Assessment of Transparency and Trustworthiness with Three Medical AI Tools}, series = {Healthcare}, volume = {10}, journal = {Healthcare}, number = {10}, publisher = {MDPI}, address = {Basel, Schweiz}, issn = {2227-9032}, doi = {10.3390/healthcare10101923}, pages = {30}, year = {2022}, abstract = {Artificial intelligence (AI) offers the potential to support healthcare delivery, but poorly trained or validated algorithms bear risks of harm. Ethical guidelines stated transparency about model development and validation as a requirement for trustworthy AI. Abundant guidance exists to provide transparency through reporting, but poorly reported medical AI tools are common. To close this transparency gap, we developed and piloted a framework to quantify the transparency of medical AI tools with three use cases. Our framework comprises a survey to report on the intended use, training and validation data and processes, ethical considerations, and deployment recommendations. The transparency of each response was scored with either 0, 0.5, or 1 to reflect if the requested information was not, partially, or fully provided. Additionally, we assessed on an analogous three-point scale if the provided responses fulfilled the transparency requirement for a set of trustworthiness criteria from ethical guidelines. The degree of transparency and trustworthiness was calculated on a scale from 0\% to 100\%. Our assessment of three medical AI use cases pin-pointed reporting gaps and resulted in transparency scores of 67\% for two use cases and one with 59\%. We report anecdotal evidence that business constraints and limited information from external datasets were major obstacles to providing transparency for the three use cases. The observed transparency gaps also lowered the degree of trustworthiness, indicating compliance gaps with ethical guidelines. All three pilot use cases faced challenges to provide transparency about medical AI tools, but more studies are needed to investigate those in the wider medical AI sector. Applying this framework for an external assessment of transparency may be infeasible if business constraints prevent the disclosure of information. New strategies may be necessary to enable audits of medical AI tools while preserving business secrets.}, language = {en} } @article{KoetzingKrejca2019, author = {K{\"o}tzing, Timo and Krejca, Martin Stefan}, title = {First-hitting times under drift}, series = {Theoretical computer science}, volume = {796}, journal = {Theoretical computer science}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0304-3975}, doi = {10.1016/j.tcs.2019.08.021}, pages = {51 -- 69}, year = {2019}, abstract = {For the last ten years, almost every theoretical result concerning the expected run time of a randomized search heuristic used drift theory, making it the arguably most important tool in this domain. Its success is due to its ease of use and its powerful result: drift theory allows the user to derive bounds on the expected first-hitting time of a random process by bounding expected local changes of the process - the drift. This is usually far easier than bounding the expected first-hitting time directly. Due to the widespread use of drift theory, it is of utmost importance to have the best drift theorems possible. We improve the fundamental additive, multiplicative, and variable drift theorems by stating them in a form as general as possible and providing examples of why the restrictions we keep are still necessary. Our additive drift theorem for upper bounds only requires the process to be lower-bounded, that is, we remove unnecessary restrictions like a finite, discrete, or bounded state space. As corollaries, the same is true for our upper bounds in the case of variable and multiplicative drift. By bounding the step size of the process, we derive new lower-bounding multiplicative and variable drift theorems. Last, we also state theorems that are applicable when the process has a drift of 0, by using a drift on the variance of the process.}, language = {en} } @article{FriedrichKoetzingKrejca2019, author = {Friedrich, Tobias and K{\"o}tzing, Timo and Krejca, Martin Stefan}, title = {Unbiasedness of estimation-of-distribution algorithms}, series = {Theoretical computer science}, volume = {785}, journal = {Theoretical computer science}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0304-3975}, doi = {10.1016/j.tcs.2018.11.001}, pages = {46 -- 59}, year = {2019}, abstract = {In the context of black-box optimization, black-box complexity is used for understanding the inherent difficulty of a given optimization problem. Central to our understanding of nature-inspired search heuristics in this context is the notion of unbiasedness. Specialized black-box complexities have been developed in order to better understand the limitations of these heuristics - especially of (population-based) evolutionary algorithms (EAs). In contrast to this, we focus on a model for algorithms explicitly maintaining a probability distribution over the search space: so-called estimation-of-distribution algorithms (EDAs). We consider the recently introduced n-Bernoulli-lambda-EDA framework, which subsumes, for example, the commonly known EDAs PBIL, UMDA, lambda-MMAS(IB), and cGA. We show that an n-Bernoulli-lambda-EDA is unbiased if and only if its probability distribution satisfies a certain invariance property under isometric automorphisms of [0, 1](n). By restricting how an n-Bernoulli-lambda-EDA can perform an update, in a way common to many examples, we derive conciser characterizations, which are easy to verify. We demonstrate this by showing that our examples above are all unbiased. (C) 2018 Elsevier B.V. All rights reserved.}, language = {en} } @misc{KoetzingKrejca2018, author = {K{\"o}tzing, Timo and Krejca, Martin Stefan}, title = {First-Hitting times under additive drift}, series = {Parallel Problem Solving from Nature - PPSN XV, PT II}, volume = {11102}, journal = {Parallel Problem Solving from Nature - PPSN XV, PT II}, publisher = {Springer}, address = {Cham}, isbn = {978-3-319-99259-4}, issn = {0302-9743}, doi = {10.1007/978-3-319-99259-4_8}, pages = {92 -- 104}, year = {2018}, abstract = {For the last ten years, almost every theoretical result concerning the expected run time of a randomized search heuristic used drift theory, making it the arguably most important tool in this domain. Its success is due to its ease of use and its powerful result: drift theory allows the user to derive bounds on the expected first-hitting time of a random process by bounding expected local changes of the process - the drift. This is usually far easier than bounding the expected first-hitting time directly. Due to the widespread use of drift theory, it is of utmost importance to have the best drift theorems possible. We improve the fundamental additive, multiplicative, and variable drift theorems by stating them in a form as general as possible and providing examples of why the restrictions we keep are still necessary. Our additive drift theorem for upper bounds only requires the process to be nonnegative, that is, we remove unnecessary restrictions like a finite, discrete, or bounded search space. As corollaries, the same is true for our upper bounds in the case of variable and multiplicative drift.}, language = {en} } @misc{KoetzingKrejca2018, author = {K{\"o}tzing, Timo and Krejca, Martin Stefan}, title = {First-Hitting times for finite state spaces}, series = {Parallel Problem Solving from Nature - PPSN XV, PT II}, volume = {11102}, journal = {Parallel Problem Solving from Nature - PPSN XV, PT II}, publisher = {Springer}, address = {Cham}, isbn = {978-3-319-99259-4}, issn = {0302-9743}, doi = {10.1007/978-3-319-99259-4_7}, pages = {79 -- 91}, year = {2018}, abstract = {One of the most important aspects of a randomized algorithm is bounding its expected run time on various problems. Formally speaking, this means bounding the expected first-hitting time of a random process. The two arguably most popular tools to do so are the fitness level method and drift theory. The fitness level method considers arbitrary transition probabilities but only allows the process to move toward the goal. On the other hand, drift theory allows the process to move into any direction as long as it move closer to the goal in expectation; however, this tendency has to be monotone and, thus, the transition probabilities cannot be arbitrary. We provide a result that combines the benefit of these two approaches: our result gives a lower and an upper bound for the expected first-hitting time of a random process over {0,..., n} that is allowed to move forward and backward by 1 and can use arbitrary transition probabilities. In case that the transition probabilities are known, our bounds coincide and yield the exact value of the expected first-hitting time. Further, we also state the stationary distribution as well as the mixing time of a special case of our scenario.}, language = {en} } @phdthesis{Dyck2020, author = {Dyck, Johannes}, title = {Verification of graph transformation systems with k-inductive invariants}, doi = {10.25932/publishup-44274}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-442742}, school = {Universit{\"a}t Potsdam}, pages = {X, 364}, year = {2020}, abstract = {With rising complexity of today's software and hardware systems and the hypothesized increase in autonomous, intelligent, and self-* systems, developing correct systems remains an important challenge. Testing, although an important part of the development and maintainance process, cannot usually establish the definite correctness of a software or hardware system - especially when systems have arbitrarily large or infinite state spaces or an infinite number of initial states. This is where formal verification comes in: given a representation of the system in question in a formal framework, verification approaches and tools can be used to establish the system's adherence to its similarly formalized specification, and to complement testing. One such formal framework is the field of graphs and graph transformation systems. Both are powerful formalisms with well-established foundations and ongoing research that can be used to describe complex hardware or software systems with varying degrees of abstraction. Since their inception in the 1970s, graph transformation systems have continuously evolved; related research spans extensions of expressive power, graph algorithms, and their implementation, application scenarios, or verification approaches, to name just a few topics. This thesis focuses on a verification approach for graph transformation systems called k-inductive invariant checking, which is an extension of previous work on 1-inductive invariant checking. Instead of exhaustively computing a system's state space, which is a common approach in model checking, 1-inductive invariant checking symbolically analyzes graph transformation rules - i.e. system behavior - in order to draw conclusions with respect to the validity of graph constraints in the system's state space. The approach is based on an inductive argument: if a system's initial state satisfies a graph constraint and if all rules preserve that constraint's validity, we can conclude the constraint's validity in the system's entire state space - without having to compute it. However, inductive invariant checking also comes with a specific drawback: the locality of graph transformation rules leads to a lack of context information during the symbolic analysis of potential rule applications. This thesis argues that this lack of context can be partly addressed by using k-induction instead of 1-induction. A k-inductive invariant is a graph constraint whose validity in a path of k-1 rule applications implies its validity after any subsequent rule application - as opposed to a 1-inductive invariant where only one rule application is taken into account. Considering a path of transformations then accumulates more context of the graph rules' applications. As such, this thesis extends existing research and implementation on 1-inductive invariant checking for graph transformation systems to k-induction. In addition, it proposes a technique to perform the base case of the inductive argument in a symbolic fashion, which allows verification of systems with an infinite set of initial states. Both k-inductive invariant checking and its base case are described in formal terms. Based on that, this thesis formulates theorems and constructions to apply this general verification approach for typed graph transformation systems and nested graph constraints - and to formally prove the approach's correctness. Since unrestricted graph constraints may lead to non-termination or impracticably high execution times given a hypothetical implementation, this thesis also presents a restricted verification approach, which limits the form of graph transformation systems and graph constraints. It is formalized, proven correct, and its procedures terminate by construction. This restricted approach has been implemented in an automated tool and has been evaluated with respect to its applicability to test cases, its performance, and its degree of completeness.}, language = {en} } @phdthesis{Harmouch2020, author = {Harmouch, Hazar}, title = {Single-column data profiling}, doi = {10.25932/publishup-47455}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-474554}, school = {Universit{\"a}t Potsdam}, pages = {x, 115}, year = {2020}, abstract = {The research area of data profiling consists of a large set of methods and processes to examine a given dataset and determine metadata about it. Typically, different data profiling tasks address different kinds of metadata, comprising either various statistics about individual columns (Single-column Analysis) or relationships among them (Dependency Discovery). Among the basic statistics about a column are data type, header, the number of unique values (the column's cardinality), maximum and minimum values, the number of null values, and the value distribution. Dependencies involve, for instance, functional dependencies (FDs), inclusion dependencies (INDs), and their approximate versions. Data profiling has a wide range of conventional use cases, namely data exploration, cleansing, and integration. The produced metadata is also useful for database management and schema reverse engineering. Data profiling has also more novel use cases, such as big data analytics. The generated metadata describes the structure of the data at hand, how to import it, what it is about, and how much of it there is. Thus, data profiling can be considered as an important preparatory task for many data analysis and mining scenarios to assess which data might be useful and to reveal and understand a new dataset's characteristics. In this thesis, the main focus is on the single-column analysis class of data profiling tasks. We study the impact and the extraction of three of the most important metadata about a column, namely the cardinality, the header, and the number of null values. First, we present a detailed experimental study of twelve cardinality estimation algorithms. We classify the algorithms and analyze their efficiency, scaling far beyond the original experiments and testing theoretical guarantees. Our results highlight their trade-offs and point out the possibility to create a parallel or a distributed version of these algorithms to cope with the growing size of modern datasets. Then, we present a fully automated, multi-phase system to discover human-understandable, representative, and consistent headers for a target table in cases where headers are missing, meaningless, or unrepresentative for the column values. Our evaluation on Wikipedia tables shows that 60\% of the automatically discovered schemata are exact and complete. Considering more schema candidates, top-5 for example, increases this percentage to 72\%. Finally, we formally and experimentally show the ghost and fake FDs phenomenon caused by FD discovery over datasets with missing values. We propose two efficient scores, probabilistic and likelihood-based, for estimating the genuineness of a discovered FD. Our extensive set of experiments on real-world and semi-synthetic datasets show the effectiveness and efficiency of these scores.}, language = {en} } @misc{KoetzingLagodzinskiLengleretal.2018, author = {K{\"o}tzing, Timo and Lagodzinski, Gregor J. A. and Lengler, Johannes and Melnichenko, Anna}, title = {Destructiveness of Lexicographic Parsimony Pressure and Alleviation by a Concatenation Crossover in Genetic Programming}, series = {Parallel Problem Solving from Nature - PPSN XV}, volume = {11102}, journal = {Parallel Problem Solving from Nature - PPSN XV}, publisher = {Springer}, address = {Cham}, isbn = {978-3-319-99259-4}, issn = {0302-9743}, doi = {10.1007/978-3-319-99259-4_4}, pages = {42 -- 54}, year = {2018}, abstract = {For theoretical analyses there are two specifics distinguishing GP from many other areas of evolutionary computation. First, the variable size representations, in particular yielding a possible bloat (i.e. the growth of individuals with redundant parts). Second, the role and realization of crossover, which is particularly central in GP due to the tree-based representation. Whereas some theoretical work on GP has studied the effects of bloat, crossover had a surprisingly little share in this work. We analyze a simple crossover operator in combination with local search, where a preference for small solutions minimizes bloat (lexicographic parsimony pressure); the resulting algorithm is denoted Concatenation Crossover GP. For this purpose three variants of the wellstudied Majority test function with large plateaus are considered. We show that the Concatenation Crossover GP can efficiently optimize these test functions, while local search cannot be efficient for all three variants independent of employing bloat control.}, language = {en} } @phdthesis{Mandal2019, author = {Mandal, Sankalita}, title = {Event handling in business processes}, doi = {10.25932/publishup-44170}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-441700}, school = {Universit{\"a}t Potsdam}, pages = {xix, 151}, year = {2019}, abstract = {Business process management (BPM) deals with modeling, executing, monitoring, analyzing, and improving business processes. During execution, the process communicates with its environment to get relevant contextual information represented as events. Recent development of big data and the Internet of Things (IoT) enables sources like smart devices and sensors to generate tons of events which can be filtered, grouped, and composed to trigger and drive business processes. The industry standard Business Process Model and Notation (BPMN) provides several event constructs to capture the interaction possibilities between a process and its environment, e.g., to instantiate a process, to abort an ongoing activity in an exceptional situation, to take decisions based on the information carried by the events, as well as to choose among the alternative paths for further process execution. The specifications of such interactions are termed as event handling. However, in a distributed setup, the event sources are most often unaware of the status of process execution and therefore, an event is produced irrespective of the process being ready to consume it. BPMN semantics does not support such scenarios and thus increases the chance of processes getting delayed or getting in a deadlock by missing out on event occurrences which might still be relevant. The work in this thesis reviews the challenges and shortcomings of integrating real-world events into business processes, especially the subscription management. The basic integration is achieved with an architecture consisting of a process modeler, a process engine, and an event processing platform. Further, points of subscription and unsubscription along the process execution timeline are defined for different BPMN event constructs. Semantic and temporal dependencies among event subscription, event occurrence, event consumption and event unsubscription are considered. To this end, an event buffer with policies for updating the buffer, retrieving the most suitable event for the current process instance, and reusing the event has been discussed that supports issuing of early subscription. The Petri net mapping of the event handling model provides our approach with a translation of semantics from a business process perspective. Two applications based on this formal foundation are presented to support the significance of different event handling configurations on correct process execution and reachability of a process path. Prototype implementations of the approaches show that realizing flexible event handling is feasible with minor extensions of off-the-shelf process engines and event platforms.}, language = {en} } @phdthesis{Taeumel2020, author = {Taeumel, Marcel}, title = {Data-driven tool construction in exploratory programming environments}, doi = {10.25932/publishup-44428}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-444289}, school = {Universit{\"a}t Potsdam}, pages = {xiv, 299}, year = {2020}, abstract = {This work presents a new design for programming environments that promote the exploration of domain-specific software artifacts and the construction of graphical tools for such program comprehension tasks. In complex software projects, tool building is essential because domain- or task-specific tools can support decision making by representing concerns concisely with low cognitive effort. In contrast, generic tools can only support anticipated scenarios, which usually align with programming language concepts or well-known project domains. However, the creation and modification of interactive tools is expensive because the glue that connects data to graphics is hard to find, change, and test. Even if valuable data is available in a common format and even if promising visualizations could be populated, programmers have to invest many resources to make changes in the programming environment. Consequently, only ideas of predictably high value will be implemented. In the non-graphical, command-line world, the situation looks different and inspiring: programmers can easily build their own tools as shell scripts by configuring and combining filter programs to process data. We propose a new perspective on graphical tools and provide a concept to build and modify such tools with a focus on high quality, low effort, and continuous adaptability. That is, (1) we propose an object-oriented, data-driven, declarative scripting language that reduces the amount of and governs the effects of glue code for view-model specifications, and (2) we propose a scalable UI-design language that promotes short feedback loops in an interactive, graphical environment such as Morphic known from Self or Squeak/Smalltalk systems. We implemented our concept as a tool building environment, which we call VIVIDE, on top of Squeak/Smalltalk and Morphic. We replaced existing code browsing and debugging tools to iterate within our solution more quickly. In several case studies with undergraduate and graduate students, we observed that VIVIDE can be applied to many domains such as live language development, source-code versioning, modular code browsing, and multi-language debugging. Then, we designed a controlled experiment to measure the effect on the time to build tools. Several pilot runs showed that training is crucial and, presumably, takes days or weeks, which implies a need for further research. As a result, programmers as users can directly work with tangible representations of their software artifacts in the VIVIDE environment. Tool builders can write domain-specific scripts to populate views to approach comprehension tasks from different angles. Our novel perspective on graphical tools can inspire the creation of new trade-offs in modularity for both data providers and view designers.}, language = {en} } @article{Schlosser2016, author = {Schlosser, Rainer}, title = {Stochastic dynamic pricing and advertising in isoelastic oligopoly models}, series = {European Journal of Operational Research}, volume = {259}, journal = {European Journal of Operational Research}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0377-2217}, doi = {10.1016/j.ejor.2016.11.021}, pages = {1144 -- 1155}, year = {2016}, abstract = {In this paper, we analyze stochastic dynamic pricing and advertising differential games in special oligopoly markets with constant price and advertising elasticity. We consider the sale of perishable as well as durable goods and include adoption effects in the demand. Based on a unique stochastic feedback Nash equilibrium, we derive closed-form solution formulas of the value functions and the optimal feedback policies of all competing firms. Efficient simulation techniques are used to evaluate optimally controlled sales processes over time. This way, the evolution of optimal controls as well as the firms' profit distributions are analyzed. Moreover, we are able to compare feedback solutions of the stochastic model with its deterministic counterpart. We show that the market power of the competing firms is exactly the same as in the deterministic version of the model. Further, we discover two fundamental effects that determine the relation between both models. First, the volatility in demand results in a decline of expected profits compared to the deterministic model. Second, we find that saturation effects in demand have an opposite character. We show that the second effect can be strong enough to either exactly balance or even overcompensate the first one. As a result we are able to identify cases in which feedback solutions of the deterministic model provide useful approximations of solutions of the stochastic model.}, language = {en} } @article{PerscheidSiegmundTaeumeletal.2017, author = {Perscheid, Michael and Siegmund, Benjamin and Taeumel, Marcel and Hirschfeld, Robert}, title = {Studying the advancement in debugging practice of professional software developers}, series = {Software Quality Journal}, volume = {25}, journal = {Software Quality Journal}, publisher = {Springer}, address = {Dordrecht}, issn = {0963-9314}, doi = {10.1007/s11219-015-9294-2}, pages = {83 -- 110}, year = {2017}, abstract = {In 1997, Henry Lieberman stated that debugging is the dirty little secret of computer science. Since then, several promising debugging technologies have been developed such as back-in-time debuggers and automatic fault localization methods. However, the last study about the state-of-the-art in debugging is still more than 15 years old and so it is not clear whether these new approaches have been applied in practice or not. For that reason, we investigate the current state of debugging in a comprehensive study. First, we review the available literature and learn about current approaches and study results. Second, we observe several professional developers while debugging and interview them about their experiences. Third, we create a questionnaire that serves as the basis for a larger online debugging survey. Based on these results, we present new insights into debugging practice that help to suggest new directions for future research.}, language = {en} } @article{BobdaYongaGebseretal.2018, author = {Bobda, Christophe and Yonga, Franck and Gebser, Martin and Ishebabi, Harold and Schaub, Torsten H.}, title = {High-level synthesis of on-chip multiprocessor architectures based on answer set programming}, series = {Journal of Parallel and Distributed Computing}, volume = {117}, journal = {Journal of Parallel and Distributed Computing}, publisher = {Elsevier}, address = {San Diego}, issn = {0743-7315}, doi = {10.1016/j.jpdc.2018.02.010}, pages = {161 -- 179}, year = {2018}, abstract = {We present a system-level synthesis approach for heterogeneous multi-processor on chip, based on Answer Set Programming(ASP). Starting with a high-level description of an application, its timing constraints and the physical constraints of the target device, our goal is to produce the optimal computing infrastructure made of heterogeneous processors, peripherals, memories and communication components. Optimization aims at maximizing speed, while minimizing chip area. Also, a scheduler must be produced that fulfills the real-time requirements of the application. Even though our approach will work for application specific integrated circuits, we have chosen FPGA as target device in this work because of their reconfiguration capabilities which makes it possible to explore several design alternatives. This paper addresses the bottleneck of problem representation size by providing a direct and compact ASP encoding for automatic synthesis that is semantically equivalent to previously established ILP and ASP models. We describe a use-case in which designers specify their applications in C/C++ from which optimum systems can be derived. We demonstrate the superiority of our approach toward existing heuristics and exact methods with synthesis results on a set of realistic case studies. (C) 2018 Elsevier Inc. All rights reserved.}, language = {en} } @article{MenningGrasnickEwaldetal.2018, author = {Menning, Axel and Grasnick, Bastien M. and Ewald, Benedikt and Dobrigkeit, Franziska and Nicolai, Claudia}, title = {Verbal focus shifts}, series = {Design Studies}, volume = {57}, journal = {Design Studies}, publisher = {Elsevier}, address = {Oxford}, issn = {0142-694X}, doi = {10.1016/j.destud.2018.03.003}, pages = {135 -- 155}, year = {2018}, abstract = {Previous studies on design behaviour indicate that focus shifts positively influence ideational productivity. In this study we want to take a closer look at how these focus shifts look on the verbal level. We describe a mutually influencing relationship between mental focus shifts and verbal low coherent statements. In a case study based on the DTRS11 dataset we identify 297 low coherent statements via a combined topic modelling and manual approach. We introduce a categorization of the different instances of low coherent statements. The results indicate that designers tend to shift topics within an existing design issue instead of completely disrupting it. (C) 2018 Elsevier Ltd. All rights reserved.}, language = {en} } @article{YousfiHeweltBaueretal.2018, author = {Yousfi, Alaaeddine and Hewelt, Marcin and Bauer, Christine and Weske, Mathias}, title = {Toward uBPMN-Based patterns for modeling ubiquitous business processes}, series = {IEEE Transactions on Industrial Informatics}, volume = {14}, journal = {IEEE Transactions on Industrial Informatics}, number = {8}, publisher = {Inst. of Electr. and Electronics Engineers}, address = {Piscataway}, issn = {1551-3203}, doi = {10.1109/TII.2017.2777847}, pages = {3358 -- 3367}, year = {2018}, abstract = {Ubiquitous business processes are the new generation of processes that pervade the physical space and interact with their environments using a minimum of human involvement. Although they are now widely deployed in the industry, their deployment is still ad hoc . They are implemented after an arbitrary modeling phase or no modeling phase at all. The absence of a solid modeling phase backing up the implementation generates many loopholes that are stressed in the literature. Here, we tackle the issue of modeling ubiquitous business processes. We propose patterns to represent the recent ubiquitous computing features. These patterns are the outcome of an analysis we conducted in the field of human-computer interaction to examine how the features are actually deployed. The patterns' understandability, ease-of-use, usefulness, and completeness are examined via a user experiment. The results indicate that these four indexes are on the positive track. Hence, the patterns may be the backbone of ubiquitous business process modeling in industrial applications.}, language = {en} } @article{NikajWeskeMendling2019, author = {Nikaj, Adriatik and Weske, Mathias and Mendling, Jan}, title = {Semi-automatic derivation of RESTful choreographies from business process choreographies}, series = {Software and systems modeling}, volume = {18}, journal = {Software and systems modeling}, number = {2}, publisher = {Springer}, address = {Heidelberg}, issn = {1619-1366}, doi = {10.1007/s10270-017-0653-2}, pages = {1195 -- 1208}, year = {2019}, abstract = {Enterprises reach out for collaborations with other organizations in order to offer complex products and services to the market. Such collaboration and coordination between different organizations, for a good share, is facilitated by information technology. The BPMN process choreography is a modeling language for specifying the exchange of information and services between different organizations at the business level. Recently, there is a surging use of the REST architectural style for the provisioning of services on the web, but few systematic engineering approach to design their collaboration. In this paper, we address this gap in a comprehensive way by defining a semi-automatic method for the derivation of RESTful choreographies from process choreographies. The method is based on natural language analysis techniques to derive interactions from the textual information in process choreographies. The proposed method is evaluated in terms of effectiveness resulting in the intervention of a web engineer in only about 10\% of all generated RESTful interactions.}, language = {en} } @article{SchlosserRichly2019, author = {Schlosser, Rainer and Richly, Keven}, title = {Dynamic pricing under competition with data-driven price anticipations and endogenous reference price effects}, series = {Journal of revenue and pricing management}, volume = {18}, journal = {Journal of revenue and pricing management}, number = {6}, publisher = {Palgrave Macmillan}, address = {Basingstoke}, issn = {1476-6930}, doi = {10.1057/s41272-019-00206-5}, pages = {451 -- 464}, year = {2019}, abstract = {Online markets have become highly dynamic and competitive. Many sellers use automated data-driven strategies to estimate demand and to update prices frequently. Further, notification services offered by marketplaces allow to continuously track markets and to react to competitors' price adjustments instantaneously. To derive successful automated repricing strategies is challenging as competitors' strategies are typically not known. In this paper, we analyze automated repricing strategies with data-driven price anticipations under duopoly competition. In addition, we account for reference price effects in demand, which are affected by the price adjustments of both competitors. We show how to derive optimized self-adaptive pricing strategies that anticipate price reactions of the competitor and take the evolution of the reference price into account. We verify that the results of our adaptive learning strategy tend to optimal solutions, which can be derived for scenarios with full information. Finally, we analyze the case in which our learning strategy is played against itself. We find that our self-adaptive strategies can be used to approximate equilibria in mixed strategies.}, language = {en} } @article{PufahlWeske2019, author = {Pufahl, Luise and Weske, Mathias}, title = {Batch activity: enhancing business process modeling and enactment with batch processing}, series = {Computing}, volume = {101}, journal = {Computing}, number = {12}, publisher = {Springer}, address = {Wien}, issn = {0010-485X}, doi = {10.1007/s00607-019-00717-4}, pages = {1909 -- 1933}, year = {2019}, abstract = {Organizations strive for efficiency in their business processes by process improvement and automation. Business process management (BPM) supports these efforts by capturing business processes in process models serving as blueprint for a number of process instances. In BPM, process instances are typically considered running independently of each other. However, batch processing-the collectively execution of several instances at specific process activities-is a common phenomenon in operational processes to reduce cost or time. Currently, batch processing is organized manually or hard-coded in software. For allowing stakeholders to explicitly represent their batch configurations in process models and their automatic execution, this paper provides a concept for batch activities and describes the corresponding execution semantics. The batch activity concept is evaluated in a two-step approach: a prototypical implementation in an existing BPM System proves its feasibility. Additionally, batch activities are applied to different use cases in a simulated environment. Its application implies cost-savings when a suitable batch configuration is selected. The batch activity concept contributes to practice by allowing the specification of batch work in process models and their automatic execution, and to research by extending the existing process modeling concepts.}, language = {en} } @article{BazhenovaZerbatoOlibonietal.2019, author = {Bazhenova, Ekaterina and Zerbato, Francesca and Oliboni, Barbara and Weske, Mathias}, title = {From BPMN process models to DMN decision models}, series = {Information systems}, volume = {83}, journal = {Information systems}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0306-4379}, doi = {10.1016/j.is.2019.02.001}, pages = {69 -- 88}, year = {2019}, abstract = {The interplay between process and decision models plays a crucial role in business process management, as decisions may be based on running processes and affect process outcomes. Often process models include decisions that are encoded through process control flow structures and data flow elements, thus reducing process model maintainability. The Decision Model and Notation (DMN) was proposed to achieve separation of concerns and to possibly complement the Business Process Model and Notation (BPMN) for designing decisions related to process models. Nevertheless, deriving decision models from process models remains challenging, especially when the same data underlie both process and decision models. In this paper, we explore how and to which extent the data modeled in BPMN processes and used for decision-making may be represented in the corresponding DMN decision models. To this end, we identify a set of patterns that capture possible representations of data in BPMN processes and that can be used to guide the derivation of decision models related to existing process models. Throughout the paper we refer to real-world healthcare processes to show the applicability of the proposed approach. (C) 2019 Elsevier Ltd. All rights reserved.}, language = {en} } @misc{DiazMendezSchoelzel2018, author = {Diaz, Sergio and Mendez, Diego and Sch{\"o}lzel, Mario}, title = {Dynamic Gallager-Humblet-Spira Algorithm for Wireless Sensor Networks}, series = {2018 IEEE Colombian Conference on Communications and Computing (COLCOM)}, journal = {2018 IEEE Colombian Conference on Communications and Computing (COLCOM)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-6820-7}, pages = {6}, year = {2018}, abstract = {The problem of constructing and maintaining a tree topology in a distributed manner is a challenging task in WSNs. This is because the nodes have limited computational and memory resources and the network changes over time. We propose the Dynamic Gallager-Humblet-Spira (D-GHS) algorithm that builds and maintains a minimum spanning tree. To do so, we divide D-GHS into four phases, namely neighbor discovery, tree construction, data collection, and tree maintenance. In the neighbor discovery phase, the nodes collect information about their neighbors and the link quality. In the tree construction, D-GHS finds the minimum spanning tree by executing the Gallager-Humblet-Spira algorithm. In the data collection phase, the sink roots the minimum spanning tree at itself, and each node sends data packets. In the tree maintenance phase, the nodes repair the tree when communication failures occur. The emulation results show that D-GHS reduces the number of control messages and the energy consumption, at the cost of a slight increase in memory size and convergence time.}, language = {en} } @misc{BoissierKurzynski2018, author = {Boissier, Martin and Kurzynski, Daniel}, title = {Workload-Driven Horizontal Partitioning and Pruning for Large HTAP Systems}, series = {2018 IEEE 34th International Conference on Data Engineering Workshops (ICDEW)}, journal = {2018 IEEE 34th International Conference on Data Engineering Workshops (ICDEW)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-6306-6}, doi = {10.1109/ICDEW.2018.00026}, pages = {116 -- 121}, year = {2018}, abstract = {Modern server systems with large NUMA architectures necessitate (i) data being distributed over the available computing nodes and (ii) NUMA-aware query processing to enable effective parallel processing in database systems. As these architectures incur significant latency and throughout penalties for accessing non-local data, queries should be executed as close as possible to the data. To further increase both performance and efficiency, data that is not relevant for the query result should be skipped as early as possible. One way to achieve this goal is horizontal partitioning to improve static partition pruning. As part of our ongoing work on workload-driven partitioning, we have implemented a recent approach called aggressive data skipping and extended it to handle both analytical as well as transactional access patterns. In this paper, we evaluate this approach with the workload and data of a production enterprise system of a Global 2000 company. The results show that over 80\% of all tuples can be skipped in average while the resulting partitioning schemata are surprisingly stable over time.}, language = {en} } @misc{PatalasMaliszewskaKrebs2018, author = {Patalas-Maliszewska, Justyna and Krebs, Irene}, title = {An Information System Supporting the Eliciting of Expert Knowledge for Successful IT Projects}, series = {Information and Software Technologies, ICIST 2018}, volume = {920}, journal = {Information and Software Technologies, ICIST 2018}, publisher = {Springer}, address = {Berlin}, isbn = {978-3-319-99972-2}, issn = {1865-0929}, doi = {10.1007/978-3-319-99972-2_1}, pages = {3 -- 13}, year = {2018}, abstract = {In order to guarantee the success of an IT project, it is necessary for a company to possess expert knowledge. The difficulty arises when experts no longer work for the company and it then becomes necessary to use their knowledge, in order to realise an IT project. In this paper, the ExKnowIT information system which supports the eliciting of expert knowledge for successful IT projects, is presented and consists of the following modules: (1) the identification of experts for successful IT projects, (2) the eliciting of expert knowledge on completed IT projects, (3) the expert knowledge base on completed IT projects, (4) the Group Method for Data Handling (GMDH) algorithm, (5) new knowledge in support of decisions regarding the selection of a manager for a new IT project. The added value of our system is that these three approaches, namely, the elicitation of expert knowledge, the success of an IT project and the discovery of new knowledge, gleaned from the expert knowledge base, otherwise known as the decision model, complement each other.}, language = {en} } @misc{IonBaudisch2018, author = {Ion, Alexandra and Baudisch, Patrick Markus}, title = {Metamaterial Devices}, publisher = {Association for Computing Machinery}, address = {New York}, isbn = {978-1-4503-5819-4}, doi = {10.1145/3214822.3214827}, pages = {2}, year = {2018}, abstract = {In our hands-on demonstration, we show several objects, the functionality of which is defined by the objects' internal micro-structure. Such metamaterial machines can (1) be mechanisms based on their microstructures, (2) employ simple mechanical computation, or (3) change their outside to interact with their environment. They are 3D printed from one piece and we support their creating by providing interactive software tools.}, language = {en} } @misc{BazhenovaZerbatoWeske2018, author = {Bazhenova, Ekaterina and Zerbato, Francesca and Weske, Mathias}, title = {Data-Centric Extraction of DMN Decision Models from BPMN Process Models}, series = {Business Process Management Workshops}, volume = {308}, journal = {Business Process Management Workshops}, publisher = {Springer}, address = {Berlin}, isbn = {978-3-319-74030-0}, issn = {1865-1348}, doi = {10.1007/978-3-319-74030-0_43}, pages = {542 -- 555}, year = {2018}, abstract = {Operational decisions in business processes can be modeled by using the Decision Model and Notation (DMN). The complementary use of DMN for decision modeling and of the Business Process Model and Notation (BPMN) for process design realizes the separation of concerns principle. For supporting separation of concerns during the design phase, it is crucial to understand which aspects of decision-making enclosed in a process model should be captured by a dedicated decision model. Whereas existing work focuses on the extraction of decision models from process control flow, the connection of process-related data and decision models is still unexplored. In this paper, we investigate how process-related data used for making decisions can be represented in process models and we distinguish a set of BPMN patterns capturing such information. Then, we provide a formal mapping of the identified BPMN patterns to corresponding DMN models and apply our approach to a real-world healthcare process.}, language = {en} } @article{ReinTaeumelHirschfeld2017, author = {Rein, Patrick and Taeumel, Marcel and Hirschfeld, Robert}, title = {Making the domain tangible}, series = {Design Thinking Research}, journal = {Design Thinking Research}, publisher = {Springer}, address = {New York}, isbn = {978-3-319-60967-6}, doi = {10.1007/978-3-319-60967-6_9}, pages = {171 -- 194}, year = {2017}, abstract = {Programmers collaborate continuously with domain experts to explore the problem space and to shape a solution that fits the users' needs. In doing so, all parties develop a shared vocabulary, which is above all a list of named concepts and their relationships to each other. Nowadays, many programmers favor object-oriented programming because it allows them to directly represent real-world concepts and interactions from the vocabulary as code. However, when existing domain data is not yet represented as objects, it becomes a challenge to initially bring existing domain data into object-oriented systems and to keep the source code readable. While source code might be comprehensible to programmers, domain experts can struggle, given their non-programming background. We present a new approach to provide a mapping of existing data sources into the object-oriented programming environment. We support keeping the code of the domain model compact and readable while adding implicit means to access external information as internal domain objects. This should encourage programmers to explore different ways to build the software system quickly. Eventually, our approach fosters communication with the domain experts, especially at the beginning of a project. When the details in the problem space are not yet clear, the source code provides a valuable, tangible communication artifact.}, language = {en} } @misc{NeubauerWankoSchaubetal.2018, author = {Neubauer, Kai and Wanko, Philipp and Schaub, Torsten H. and Haubelt, Christian}, title = {Exact multi-objective design space exploration using ASPmT}, series = {Proceedings of the 2018 Design, Automation \& Test in Europe Conference \& Exhibition (DATE)}, journal = {Proceedings of the 2018 Design, Automation \& Test in Europe Conference \& Exhibition (DATE)}, publisher = {IEEE}, address = {New York}, isbn = {978-3-9819-2630-9}, issn = {1530-1591}, doi = {10.23919/DATE.2018.8342014}, pages = {257 -- 260}, year = {2018}, abstract = {An efficient Design Space Exploration (DSE) is imperative for the design of modern, highly complex embedded systems in order to steer the development towards optimal design points. The early evaluation of design decisions at system-level abstraction layer helps to find promising regions for subsequent development steps in lower abstraction levels by diminishing the complexity of the search problem. In recent works, symbolic techniques, especially Answer Set Programming (ASP) modulo Theories (ASPmT), have been shown to find feasible solutions of highly complex system-level synthesis problems with non-linear constraints very efficiently. In this paper, we present a novel approach to a holistic system-level DSE based on ASPmT. To this end, we include additional background theories that concurrently guarantee compliance with hard constraints and perform the simultaneous optimization of several design objectives. We implement and compare our approach with a state-of-the-art preference handling framework for ASP. Experimental results indicate that our proposed method produces better solutions with respect to both diversity and convergence to the true Pareto front.}, language = {en} } @misc{PufahlWongWeske2018, author = {Pufahl, Luise and Wong, Tsun Yin and Weske, Mathias}, title = {Design of an extensible BPMN process simulator}, series = {Business Process Management Workshops (BPM 2017)}, volume = {308}, journal = {Business Process Management Workshops (BPM 2017)}, publisher = {Springer}, address = {Berlin}, isbn = {978-3-319-74030-0}, issn = {1865-1348}, doi = {10.1007/978-3-319-74030-0_62}, pages = {782 -- 795}, year = {2018}, abstract = {Business process simulation is an important means for quantitative analysis of a business process and to compare different process alternatives. With the Business Process Model and Notation (BPMN) being the state-of-the-art language for the graphical representation of business processes, many existing process simulators support already the simulation of BPMN diagrams. However, they do not provide well-defined interfaces to integrate new concepts in the simulation environment. In this work, we present the design and architecture of a proof-of-concept implementation of an open and extensible BPMN process simulator. It also supports the simulation of multiple BPMN processes at a time and relies on the building blocks of the well-founded discrete event simulation. The extensibility is assured by a plug-in concept. Its feasibility is demonstrated by extensions supporting new BPMN concepts, such as the simulation of business rule activities referencing decision models and batch activities.}, language = {en} } @article{RischKrestel2019, author = {Risch, Julian and Krestel, Ralf}, title = {Domain-specific word embeddings for patent classification}, series = {Data Technologies and Applications}, volume = {53}, journal = {Data Technologies and Applications}, number = {1}, publisher = {Emerald Group Publishing Limited}, address = {Bingley}, issn = {2514-9288}, doi = {10.1108/DTA-01-2019-0002}, pages = {108 -- 122}, year = {2019}, abstract = {Purpose Patent offices and other stakeholders in the patent domain need to classify patent applications according to a standardized classification scheme. The purpose of this paper is to examine the novelty of an application it can then be compared to previously granted patents in the same class. Automatic classification would be highly beneficial, because of the large volume of patents and the domain-specific knowledge needed to accomplish this costly manual task. However, a challenge for the automation is patent-specific language use, such as special vocabulary and phrases. Design/methodology/approach To account for this language use, the authors present domain-specific pre-trained word embeddings for the patent domain. The authors train the model on a very large data set of more than 5m patents and evaluate it at the task of patent classification. To this end, the authors propose a deep learning approach based on gated recurrent units for automatic patent classification built on the trained word embeddings. Findings Experiments on a standardized evaluation data set show that the approach increases average precision for patent classification by 17 percent compared to state-of-the-art approaches. In this paper, the authors further investigate the model's strengths and weaknesses. An extensive error analysis reveals that the learned embeddings indeed mirror patent-specific language use. The imbalanced training data and underrepresented classes are the most difficult remaining challenge. Originality/value The proposed approach fulfills the need for domain-specific word embeddings for downstream tasks in the patent domain, such as patent classification or patent analysis.}, language = {en} } @article{MoeringdeMutiis2019, author = {M{\"o}ring, Sebastian and de Mutiis, Marco}, title = {Camera Ludica}, series = {Intermedia games - Games inter media : Video games and intermediality}, journal = {Intermedia games - Games inter media : Video games and intermediality}, publisher = {Bloomsbury academic}, address = {New York}, isbn = {978-1-5013-3051-3}, pages = {69 -- 93}, year = {2019}, language = {en} } @misc{Matthies2019, author = {Matthies, Christoph}, title = {Agile process improvement in retrospectives}, series = {41st International Conference on Software Engineering: Companion Proceedings (ICSE-Companion)}, journal = {41st International Conference on Software Engineering: Companion Proceedings (ICSE-Companion)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-7281-1764-5}, issn = {2574-1934}, doi = {10.1109/ICSE-Companion.2019.00063}, pages = {150 -- 152}, year = {2019}, abstract = {Working in iterations and repeatedly improving team workflows based on collected feedback is fundamental to agile software development processes. Scrum, the most popular agile method, provides dedicated retrospective meetings to reflect on the last development iteration and to decide on process improvement actions. However, agile methods do not prescribe how these improvement actions should be identified, managed or tracked in detail. The approaches to detect and remove problems in software development processes are therefore often based on intuition and prior experiences and perceptions of team members. Previous research in this area has focused on approaches to elicit a team's improvement opportunities as well as measurements regarding the work performed in an iteration, e.g. Scrum burn-down charts. Little research deals with the quality and nature of identified problems or how progress towards removing issues is measured. In this research, we investigate how agile development teams in the professional software industry organize their feedback and process improvement approaches. In particular, we focus on the structure and content of improvement and reflection meetings, i.e. retrospectives, and their outcomes. Researching how the vital mechanism of process improvement is implemented in practice in modern software development leads to a more complete picture of agile process improvement.}, language = {en} } @misc{Matthies2019, author = {Matthies, Christoph}, title = {Feedback in Scrum}, series = {2019 IEEE/ACM 41st International Conference on Software Engineering: Companion Proceedings (ICSE-Companion)}, journal = {2019 IEEE/ACM 41st International Conference on Software Engineering: Companion Proceedings (ICSE-Companion)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-7281-1764-5}, issn = {2574-1934}, doi = {10.1109/ICSE-Companion.2019.00081}, pages = {198 -- 201}, year = {2019}, abstract = {Improving the way that teams work together by reflecting and improving the executed process is at the heart of agile processes. The idea of iterative process improvement takes various forms in different agile development methodologies, e.g. Scrum Retrospectives. However, these methods do not prescribe how improvement steps should be conducted in detail. In this research we investigate how agile software teams can use their development data, such as commits or tickets, created during regular development activities, to drive and track process improvement steps. Our previous research focused on data-informed process improvement in the context of student teams, where controlled circumstances and deep domain knowledge allowed creation and usage of specific process measures. Encouraged by positive results in this area, we investigate the process improvement approaches employed in industry teams. Researching how the vital mechanism of process improvement is implemented and how development data is already being used in practice in modern software development leads to a more complete picture of agile process improvement. It is the first step in enabling a data-informed feedback and improvement process, tailored to a team's context and based on the development data of individual teams.}, language = {en} } @misc{BrandGiese2019, author = {Brand, Thomas and Giese, Holger}, title = {Generic adaptive monitoring based on executed architecture runtime model queries and events}, series = {IEEE Xplore}, journal = {IEEE Xplore}, publisher = {IEEE}, address = {New York}, isbn = {978-1-7281-2731-6}, issn = {1949-3673}, doi = {10.1109/SASO.2019.00012}, pages = {17 -- 22}, year = {2019}, abstract = {Monitoring is a key functionality for automated decision making as it is performed by self-adaptive systems, too. Effective monitoring provides the relevant information on time. This can be achieved with exhaustive monitoring causing a high overhead consumption of economical and ecological resources. In contrast, our generic adaptive monitoring approach supports effectiveness with increased efficiency. Also, it adapts to changes regarding the information demand and the monitored system without additional configuration and software implementation effort. The approach observes the executions of runtime model queries and processes change events to determine the currently required monitoring configuration. In this paper we explicate different possibilities to use the approach and evaluate their characteristics regarding the phenomenon detection time and the monitoring effort. Our approach allows balancing between those two characteristics. This makes it an interesting option for the monitoring function of self-adaptive systems because for them usually very short-lived phenomena are not relevant.}, language = {en} } @misc{BruechnerRenzKlingbeil2019, author = {Bruechner, Dominik and Renz, Jan and Klingbeil, Mandy}, title = {Creating a Framework for User-Centered Development and Improvement of Digital Education}, series = {Scale}, journal = {Scale}, publisher = {Association for Computing Machinery}, address = {New York}, isbn = {978-1-4503-6804-9}, doi = {10.1145/3330430.3333644}, pages = {4}, year = {2019}, abstract = {We investigate how the technology acceptance and learning experience of the digital education platform HPI Schul-Cloud (HPI School Cloud) for German secondary school teachers can be improved by proposing a user-centered research and development framework. We highlight the importance of developing digital learning technologies in a user-centered way to take differences in the requirements of educators and students into account. We suggest applying qualitative and quantitative methods to build a solid understanding of a learning platform's users, their needs, requirements, and their context of use. After concept development and idea generation of features and areas of opportunity based on the user research, we emphasize on the application of a multi-attribute utility analysis decision-making framework to prioritize ideas rationally, taking results of user research into account. Afterward, we recommend applying the principle build-learn-iterate to build prototypes in different resolutions while learning from user tests and improving the selected opportunities. Last but not least, we propose an approach for continuous short- and long-term user experience controlling and monitoring, extending existing web- and learning analytics metrics.}, language = {en} } @misc{BiloFriedrichLenzneretal.2019, author = {Bilo, Davide and Friedrich, Tobias and Lenzner, Pascal and Melnichenko, Anna}, title = {Geometric Network Creation Games}, series = {SPAA '19: The 31st ACM Symposium on Parallelism in Algorithms and Architectures}, journal = {SPAA '19: The 31st ACM Symposium on Parallelism in Algorithms and Architectures}, publisher = {Association for Computing Machinery}, address = {New York}, isbn = {978-1-4503-6184-2}, doi = {10.1145/3323165.3323199}, pages = {323 -- 332}, year = {2019}, abstract = {Network Creation Games are a well-known approach for explaining and analyzing the structure, quality and dynamics of real-world networks like the Internet and other infrastructure networks which evolved via the interaction of selfish agents without a central authority. In these games selfish agents which correspond to nodes in a network strategically buy incident edges to improve their centrality. However, past research on these games has only considered the creation of networks with unit-weight edges. In practice, e.g. when constructing a fiber-optic network, the choice of which nodes to connect and also the induced price for a link crucially depends on the distance between the involved nodes and such settings can be modeled via edge-weighted graphs. We incorporate arbitrary edge weights by generalizing the well-known model by Fabrikant et al. [PODC'03] to edge-weighted host graphs and focus on the geometric setting where the weights are induced by the distances in some metric space. In stark contrast to the state-of-the-art for the unit-weight version, where the Price of Anarchy is conjectured to be constant and where resolving this is a major open problem, we prove a tight non-constant bound on the Price of Anarchy for the metric version and a slightly weaker upper bound for the non-metric case. Moreover, we analyze the existence of equilibria, the computational hardness and the game dynamics for several natural metrics. The model we propose can be seen as the game-theoretic analogue of a variant of the classical Network Design Problem. Thus, low-cost equilibria of our game correspond to decentralized and stable approximations of the optimum network design.}, language = {en} } @misc{GonzalezLopezPufahl2019, author = {Gonzalez-Lopez, Fernanda and Pufahl, Luise}, title = {A Landscape for Case Models}, series = {Enterprise, Business-Process and Information Systems Modeling}, volume = {352}, journal = {Enterprise, Business-Process and Information Systems Modeling}, publisher = {Springer}, address = {Berlin}, isbn = {978-3-030-20618-5}, issn = {1865-1348}, doi = {10.1007/978-3-030-20618-5_6}, pages = {87 -- 102}, year = {2019}, abstract = {Case Management is a paradigm to support knowledge-intensive processes. The different approaches developed for modeling these types of processes tend to result in scattered models due to the low abstraction level at which the inherently complex processes are therein represented. Thus, readability and understandability is more challenging than that of traditional process models. By reviewing existing proposals in the field of process overviews and case models, this paper extends a case modeling language - the fragment-based Case Management (fCM) language - with the goal of modeling knowledge-intensive processes from a higher abstraction level - to generate a so-called fCM landscape. This proposal is empirically evaluated via an online experiment. Results indicate that interpreting an fCM landscape might be more effective and efficient than interpreting an informationally equivalent case model.}, language = {en} } @article{LambersBornKosioletal.2018, author = {Lambers, Leen and Born, Kristopher and Kosiol, Jens and Str{\"u}ber, Daniel and Taentzer, Gabriele}, title = {Granularity of conflicts and dependencies in graph transformation systems}, series = {Journal of Logical and Algebraic Methods in Programming}, volume = {103}, journal = {Journal of Logical and Algebraic Methods in Programming}, publisher = {Elsevier}, address = {New York}, issn = {2352-2208}, doi = {10.1016/j.jlamp.2018.11.004}, pages = {105 -- 129}, year = {2018}, abstract = {Conflict and dependency analysis (CDA) is a static analysis for the detection of conflicting and dependent rule applications in a graph transformation system. The state-of-the-art CDA technique, critical pair analysis, provides all potential conflicts and dependencies in minimal context as critical pairs, for each pair of rules. Yet, critical pairs can be hard to understand; users are mainly interested in core information about conflicts and dependencies occurring in various combinations. In this paper, we present an approach to conflicts and dependencies in graph transformation systems based on two dimensions of granularity. The first dimension refers to the overlap considered between the rules of a given rule pair; the second one refers to the represented amount of context information about transformations in which the conflicts occur. We introduce a variety of new conflict notions, in particular, conflict atoms, conflict reasons, and minimal conflict reasons, relate them to the existing conflict notions of critical pairs and initial conflicts, and position all of these notions within our granularity approach. Finally, we introduce dual concepts for dependency analysis. As we discuss in a running example, our approach paves the way for an improved CDA technique. (C) 2018 Elsevier Inc. All rights reserved.}, language = {en} } @article{GebserKaminskiKaufmannetal.2018, author = {Gebser, Martin and Kaminski, Roland and Kaufmann, Benjamin and Schaub, Torsten H.}, title = {Multi-shot ASP solving with clingo}, series = {Theory and practice of logic programming}, volume = {19}, journal = {Theory and practice of logic programming}, number = {1}, publisher = {Cambridge Univ. Press}, address = {New York}, issn = {1471-0684}, doi = {10.1017/S1471068418000054}, pages = {27 -- 82}, year = {2018}, abstract = {We introduce a new flexible paradigm of grounding and solving in Answer Set Programming (ASP), which we refer to as multi-shot ASP solving, and present its implementation in the ASP system clingo. Multi-shot ASP solving features grounding and solving processes that deal with continuously changing logic programs. In doing so, they remain operative and accommodate changes in a seamless way. For instance, such processes allow for advanced forms of search, as in optimization or theory solving, or interaction with an environment, as in robotics or query answering. Common to them is that the problem specification evolves during the reasoning process, either because data or constraints are added, deleted, or replaced. This evolutionary aspect adds another dimension to ASP since it brings about state changing operations. We address this issue by providing an operational semantics that characterizes grounding and solving processes in multi-shot ASP solving. This characterization provides a semantic account of grounder and solver states along with the operations manipulating them. The operative nature of multi-shot solving avoids redundancies in relaunching grounder and solver programs and benefits from the solver's learning capacities. clingo accomplishes this by complementing ASP's declarative input language with control capacities. On the declarative side, a new directive allows for structuring logic programs into named and parameterizable subprograms. The grounding and integration of these subprograms into the solving process is completely modular and fully controllable from the procedural side. To this end, clingo offers a new application programming interface that is conveniently accessible via scripting languages. By strictly separating logic and control, clingo also abolishes the need for dedicated systems for incremental and reactive reasoning, like iclingo and oclingo, respectively, and its flexibility goes well beyond the advanced yet still rigid solving processes of the latter.}, language = {en} } @article{SchlosserWaltherBoissieretal.2019, author = {Schlosser, Rainer and Walther, Carsten and Boissier, Martin and Uflacker, Matthias}, title = {Automated repricing and ordering strategies in competitive markets}, series = {AI communications : AICOM ; the European journal on artificial intelligence}, volume = {32}, journal = {AI communications : AICOM ; the European journal on artificial intelligence}, number = {1}, publisher = {IOS Press}, address = {Amsterdam}, issn = {0921-7126}, doi = {10.3233/AIC-180603}, pages = {15 -- 29}, year = {2019}, abstract = {Merchants on modern e-commerce platforms face a highly competitive environment. They compete against each other using automated dynamic pricing and ordering strategies. Successfully managing both inventory levels as well as offer prices is a challenging task as (i) demand is uncertain, (ii) competitors strategically interact, and (iii) optimized pricing and ordering decisions are mutually dependent. We show how to derive optimized data-driven pricing and ordering strategies which are based on demand learning techniques and efficient dynamic optimization models. We verify the superior performance of our self-adaptive strategies by comparing them to different rule-based as well as data-driven strategies in duopoly and oligopoly settings. Further, to study and to optimize joint dynamic ordering and pricing strategies on online marketplaces, we built an interactive simulation platform. To be both flexible and scalable, the platform has a microservice-based architecture and allows handling dozens of competing merchants and streams of consumers with configurable characteristics.}, language = {en} } @misc{HalfpapSchlosser2019, author = {Halfpap, Stefan and Schlosser, Rainer}, title = {Workload-Driven Fragment Allocation for Partially Replicated Databases Using Linear Programming}, series = {2019 IEEE 35th International Conference on Data Engineering (ICDE)}, journal = {2019 IEEE 35th International Conference on Data Engineering (ICDE)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-7474-1}, issn = {1084-4627}, doi = {10.1109/ICDE.2019.00188}, pages = {1746 -- 1749}, year = {2019}, abstract = {In replication schemes, replica nodes can process read-only queries on snapshots of the master node without violating transactional consistency. By analyzing the workload, we can identify query access patterns and replicate data depending to its access frequency. In this paper, we define a linear programming (LP) model to calculate the set of partial replicas with the lowest overall memory capacity while evenly balancing the query load. Furthermore, we propose a scalable decomposition heuristic to calculate solutions for larger problem sizes. While guaranteeing the same performance as state-of-the-art heuristics, our decomposition approach calculates allocations with up to 23\% lower memory footprint for the TPC-H benchmark.}, language = {en} } @misc{HalfpapSchlosser2019, author = {Halfpap, Stefan and Schlosser, Rainer}, title = {A Comparison of Allocation Algorithms for Partially Replicated Databases}, series = {2019 IEEE 35th International Conference on Data Engineering (ICDE)}, journal = {2019 IEEE 35th International Conference on Data Engineering (ICDE)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-7474-1}, issn = {1084-4627}, doi = {10.1109/ICDE.2019.00226}, pages = {2008 -- 2011}, year = {2019}, abstract = {Increasing demand for analytical processing capabilities can be managed by replication approaches. However, to evenly balance the replicas' workload shares while at the same time minimizing the data replication factor is a highly challenging allocation problem. As optimal solutions are only applicable for small problem instances, effective heuristics are indispensable. In this paper, we test and compare state-of-the-art allocation algorithms for partial replication. By visualizing and exploring their (heuristic) solutions for different benchmark workloads, we are able to derive structural insights and to detect an algorithm's strengths as well as its potential for improvement. Further, our application enables end-to-end evaluations of different allocations to verify their theoretical performance.}, language = {en} } @article{FriedrichKrejcaRothenbergeretal.2019, author = {Friedrich, Tobias and Krejca, Martin Stefan and Rothenberger, Ralf and Arndt, Tobias and Hafner, Danijar and Kellermeier, Thomas and Krogmann, Simon and Razmjou, Armin}, title = {Routing for on-street parking search using probabilistic data}, series = {AI communications : AICOM ; the European journal on artificial intelligence}, volume = {32}, journal = {AI communications : AICOM ; the European journal on artificial intelligence}, number = {2}, publisher = {IOS Press}, address = {Amsterdam}, issn = {0921-7126}, doi = {10.3233/AIC-180574}, pages = {113 -- 124}, year = {2019}, abstract = {A significant percentage of urban traffic is caused by the search for parking spots. One possible approach to improve this situation is to guide drivers along routes which are likely to have free parking spots. The task of finding such a route can be modeled as a probabilistic graph problem which is NP-complete. Thus, we propose heuristic approaches for solving this problem and evaluate them experimentally. For this, we use probabilities of finding a parking spot, which are based on publicly available empirical data from TomTom International B.V. Additionally, we propose a heuristic that relies exclusively on conventional road attributes. Our experiments show that this algorithm comes close to the baseline by a factor of 1.3 in our cost measure. Last, we complement our experiments with results from a field study, comparing the success rates of our algorithms against real human drivers.}, language = {en} } @misc{ChakrabortyHammerBugiel2019, author = {Chakraborty, Dhiman and Hammer, Christian and Bugiel, Sven}, title = {Secure Multi-Execution in Android}, series = {Proceedings of the 34th ACM/SIGAPP Symposium on Applied Computing}, journal = {Proceedings of the 34th ACM/SIGAPP Symposium on Applied Computing}, publisher = {Association for Computing Machinery}, address = {New York}, isbn = {978-1-4503-5933-7}, doi = {10.1145/3297280.3297469}, pages = {1934 -- 1943}, year = {2019}, abstract = {Mobile operating systems, such as Google's Android, have become a fixed part of our daily lives and are entrusted with a plethora of private information. Congruously, their data protection mechanisms have been improved steadily over the last decade and, in particular, for Android, the research community has explored various enhancements and extensions to the access control model. However, the vast majority of those solutions has been concerned with controlling the access to data, but equally important is the question of how to control the flow of data once released. Ignoring control over the dissemination of data between applications or between components of the same app, opens the door for attacks, such as permission re-delegation or privacy-violating third-party libraries. Controlling information flows is a long-standing problem, and one of the most recent and practical-oriented approaches to information flow control is secure multi-execution. In this paper, we present Ariel, the design and implementation of an IFC architecture for Android based on the secure multi-execution of apps. Ariel demonstrably extends Android's system with support for executing multiple instances of apps, and it is equipped with a policy lattice derived from the protection levels of Android's permissions as well as an I/O scheduler to achieve control over data flows between application instances. We demonstrate how secure multi-execution with Ariel can help to mitigate two prominent attacks on Android, permission re-delegations and malicious advertisement libraries.}, language = {en} } @misc{WelearegaiSchlueterHammer2019, author = {Welearegai, Gebrehiwet B. and Schlueter, Max and Hammer, Christian}, title = {Static security evaluation of an industrial web application}, series = {Proceedings of the 34th ACM/SIGAPP Symposium on Applied Computing}, journal = {Proceedings of the 34th ACM/SIGAPP Symposium on Applied Computing}, publisher = {Association for Computing Machinery}, address = {New York}, isbn = {978-1-4503-5933-7}, doi = {10.1145/3297280.3297471}, pages = {1952 -- 1961}, year = {2019}, abstract = {JavaScript is the most popular programming language for web applications. Static analysis of JavaScript applications is highly challenging due to its dynamic language constructs and event-driven asynchronous executions, which also give rise to many security-related bugs. Several static analysis tools to detect such bugs exist, however, research has not yet reported much on the precision and scalability trade-off of these analyzers. As a further obstacle, JavaScript programs structured in Node. js modules need to be collected for analysis, but existing bundlers are either specific to their respective analysis tools or not particularly suitable for static analysis.}, language = {en} } @misc{Friedrich2019, author = {Friedrich, Tobias}, title = {From graph theory to network science}, series = {36th International Symposium on Theoretical Aspects of Computer Science (STACS 2019)}, volume = {126}, journal = {36th International Symposium on Theoretical Aspects of Computer Science (STACS 2019)}, publisher = {Schloss Dagstuhl-Leibniz-Zentrum f{\"u}r Informatik}, address = {Dragstuhl}, isbn = {978-3-95977-100-9}, doi = {10.4230/LIPIcs.STACS.2019.5}, pages = {9}, year = {2019}, abstract = {Network science is driven by the question which properties large real-world networks have and how we can exploit them algorithmically. In the past few years, hyperbolic graphs have emerged as a very promising model for scale-free networks. The connection between hyperbolic geometry and complex networks gives insights in both directions: (1) Hyperbolic geometry forms the basis of a natural and explanatory model for real-world networks. Hyperbolic random graphs are obtained by choosing random points in the hyperbolic plane and connecting pairs of points that are geometrically close. The resulting networks share many structural properties for example with online social networks like Facebook or Twitter. They are thus well suited for algorithmic analyses in a more realistic setting. (2) Starting with a real-world network, hyperbolic geometry is well-suited for metric embeddings. The vertices of a network can be mapped to points in this geometry, such that geometric distances are similar to graph distances. Such embeddings have a variety of algorithmic applications ranging from approximations based on efficient geometric algorithms to greedy routing solely using hyperbolic coordinates for navigation decisions.}, language = {en} } @misc{BrandGiese2019, author = {Brand, Thomas and Giese, Holger Burkhard}, title = {Towards Generic Adaptive Monitoring}, series = {2018 IEEE 12th International Conference on Self-Adaptive and Self-Organizing Systems (SASO)}, journal = {2018 IEEE 12th International Conference on Self-Adaptive and Self-Organizing Systems (SASO)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-5172-8}, issn = {1949-3673}, doi = {10.1109/SASO.2018.00027}, pages = {156 -- 161}, year = {2019}, abstract = {Monitoring is a key prerequisite for self-adaptive software and many other forms of operating software. Monitoring relevant lower level phenomena like the occurrences of exceptions and diagnosis data requires to carefully examine which detailed information is really necessary and feasible to monitor. Adaptive monitoring permits observing a greater variety of details with less overhead, if most of the time the MAPE-K loop can operate using only a small subset of all those details. However, engineering such an adaptive monitoring is a major engineering effort on its own that further complicates the development of self-adaptive software. The proposed approach overcomes the outlined problems by providing generic adaptive monitoring via runtime models. It reduces the effort to introduce and apply adaptive monitoring by avoiding additional development effort for controlling the monitoring adaptation. Although the generic approach is independent from the monitoring purpose, it still allows for substantial savings regarding the monitoring resource consumption as demonstrated by an example.}, language = {en} } @misc{PlauthPolze2018, author = {Plauth, Max and Polze, Andreas}, title = {Towards improving data transfer efficiency for accelerators using hardware compression}, series = {Sixth International Symposium on Computing and Networking Workshops (CANDARW)}, journal = {Sixth International Symposium on Computing and Networking Workshops (CANDARW)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-9184-7}, doi = {10.1109/CANDARW.2018.00031}, pages = {125 -- 131}, year = {2018}, abstract = {The overhead of moving data is the major limiting factor in todays hardware, especially in heterogeneous systems where data needs to be transferred frequently between host and accelerator memory. With the increasing availability of hardware-based compression facilities in modern computer architectures, this paper investigates the potential of hardware-accelerated I/O Link Compression as a promising approach to reduce data volumes and transfer time, thus improving the overall efficiency of accelerators in heterogeneous systems. Our considerations are focused on On-the-Fly compression in both Single-Node and Scale-Out deployments. Based on a theoretical analysis, this paper demonstrates the feasibility of hardware-accelerated On-the-Fly I/O Link Compression for many workloads in a Scale-Out scenario, and for some even in a Single-Node scenario. These findings are confirmed in a preliminary evaluation using software-and hardware-based implementations of the 842 compression algorithm.}, language = {en} } @misc{MatthiesTeusnerHesse2018, author = {Matthies, Christoph and Teusner, Ralf and Hesse, G{\"u}nter}, title = {Beyond Surveys}, series = {2018 IEEE Frontiers in Education (FIE) Conference}, journal = {2018 IEEE Frontiers in Education (FIE) Conference}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-1174-6}, issn = {0190-5848}, pages = {9}, year = {2018}, language = {en} } @misc{TeusnerMatthiesStaubitz2018, author = {Teusner, Ralf and Matthies, Christoph and Staubitz, Thomas}, title = {What Stays in Mind?}, series = {IEEE Frontiers in Education Conference (FIE)}, journal = {IEEE Frontiers in Education Conference (FIE)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-1174-6}, issn = {0190-5848}, doi = {10.1109/FIE.2018.8658890}, pages = {9}, year = {2018}, language = {en} } @misc{Richly2019, author = {Richly, Keven}, title = {A survey on trajectory data management for hybrid transactional and analytical workloads}, series = {IEEE International Conference on Big Data (Big Data)}, journal = {IEEE International Conference on Big Data (Big Data)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-5035-6}, issn = {2639-1589}, doi = {10.1109/BigData.2018.8622394}, pages = {562 -- 569}, year = {2019}, abstract = {Rapid advances in location-acquisition technologies have led to large amounts of trajectory data. This data is the foundation for a broad spectrum of services driven and improved by trajectory data mining. However, for hybrid transactional and analytical workloads, the storing and processing of rapidly accumulated trajectory data is a non-trivial task. In this paper, we present a detailed survey about state-of-the-art trajectory data management systems. To determine the relevant aspects and requirements for such systems, we developed a trajectory data mining framework, which summarizes the different steps in the trajectory data mining process. Based on the derived requirements, we analyze different concepts to store, compress, index, and process spatio-temporal data. There are various trajectory management systems, which are optimized for scalability, data footprint reduction, elasticity, or query performance. To get a comprehensive overview, we describe and compare different exciting systems. Additionally, the observed similarities in the general structure of different systems are consolidated in a general blueprint of trajectory management systems.}, language = {en} } @misc{Richly2019, author = {Richly, Keven}, title = {Leveraging spatio-temporal soccer data to define a graphical query language for game recordings}, series = {IEEE International Conference on Big Data (Big Data)}, journal = {IEEE International Conference on Big Data (Big Data)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-5035-6}, issn = {2639-1589}, doi = {10.1109/BigData.2018.8622159}, pages = {3456 -- 3463}, year = {2019}, abstract = {For professional soccer clubs, performance and video analysis are an integral part of the preparation and post-processing of games. Coaches, scouts, and video analysts extract information about strengths and weaknesses of their team as well as opponents by manually analyzing video recordings of past games. Since video recordings are an unstructured data source, it is a complex and time-intensive task to find specific game situations and identify similar patterns. In this paper, we present a novel approach to detect patterns and situations (e.g., playmaking and ball passing of midfielders) based on trajectory data. The application uses the metaphor of a tactic board to offer a graphical query language. With this interactive tactic board, the user can model a game situation or mark a specific situation in the video recording for which all matching occurrences in various games are immediately displayed, and the user can directly jump to the corresponding game scene. Through the additional visualization of key performance indicators (e.g.,the physical load of the players), the user can get a better overall assessment of situations. With the capabilities to find specific game situations and complex patterns in video recordings, the interactive tactic board serves as a useful tool to improve the video analysis process of professional sports teams.}, language = {en} } @misc{RepkeKrestelEddingetal.2018, author = {Repke, Tim and Krestel, Ralf and Edding, Jakob and Hartmann, Moritz and Hering, Jonas and Kipping, Dennis and Schmidt, Hendrik and Scordialo, Nico and Zenner, Alexander}, title = {Beacon in the Dark}, series = {Proceedings of the 27th ACM International Conference on Information and Knowledge Management}, journal = {Proceedings of the 27th ACM International Conference on Information and Knowledge Management}, publisher = {Association for Computing Machinery}, address = {New York}, isbn = {978-1-4503-6014-2}, doi = {10.1145/3269206.3269231}, pages = {1871 -- 1874}, year = {2018}, abstract = {The large amount of heterogeneous data in these email corpora renders experts' investigations by hand infeasible. Auditors or journalists, e.g., who are looking for irregular or inappropriate content or suspicious patterns, are in desperate need for computer-aided exploration tools to support their investigations. We present our Beacon system for the exploration of such corpora at different levels of detail. A distributed processing pipeline combines text mining methods and social network analysis to augment the already semi-structured nature of emails. The user interface ties into the resulting cleaned and enriched dataset. For the interface design we identify three objectives expert users have: gain an initial overview of the data to identify leads to investigate, understand the context of the information at hand, and have meaningful filters to iteratively focus onto a subset of emails. To this end we make use of interactive visualisations based on rearranged and aggregated extracted information to reveal salient patterns.}, language = {en} } @misc{LosterNaumannEhmuelleretal.2018, author = {Loster, Michael and Naumann, Felix and Ehmueller, Jan and Feldmann, Benjamin}, title = {CurEx}, series = {Proceedings of the 27th ACM International Conference on Information and Knowledge Management}, journal = {Proceedings of the 27th ACM International Conference on Information and Knowledge Management}, publisher = {Association for Computing Machinery}, address = {New York}, isbn = {978-1-4503-6014-2}, doi = {10.1145/3269206.3269229}, pages = {1883 -- 1886}, year = {2018}, abstract = {The integration of diverse structured and unstructured information sources into a unified, domain-specific knowledge base is an important task in many areas. A well-maintained knowledge base enables data analysis in complex scenarios, such as risk analysis in the financial sector or investigating large data leaks, such as the Paradise or Panama papers. Both the creation of such knowledge bases, as well as their continuous maintenance and curation involves many complex tasks and considerable manual effort. With CurEx, we present a modular system that allows structured and unstructured data sources to be integrated into a domain-specific knowledge base. In particular, we (i) enable the incremental improvement of each individual integration component; (ii) enable the selective generation of multiple knowledge graphs from the information contained in the knowledge base; and (iii) provide two distinct user interfaces tailored to the needs of data engineers and end-users respectively. The former has curation capabilities and controls the integration process, whereas the latter focuses on the exploration of the generated knowledge graph.}, language = {en} } @misc{Matthies2018, author = {Matthies, Christoph}, title = {Scrum2kanban}, series = {Proceedings of the 2nd International Workshop on Software Engineering Education for Millennials}, journal = {Proceedings of the 2nd International Workshop on Software Engineering Education for Millennials}, publisher = {IEEE}, address = {New York}, isbn = {978-1-45035-750-0}, doi = {10.1145/3194779.3194784}, pages = {48 -- 55}, year = {2018}, abstract = {Using university capstone courses to teach agile software development methodologies has become commonplace, as agile methods have gained support in professional software development. This usually means students are introduced to and work with the currently most popular agile methodology: Scrum. However, as the agile methods employed in the industry change and are adapted to different contexts, university courses must follow suit. A prime example of this is the Kanban method, which has recently gathered attention in the industry. In this paper, we describe a capstone course design, which adds the hands-on learning of the lean principles advocated by Kanban into a capstone project run with Scrum. This both ensures that students are aware of recent process frameworks and ideas as well as gain a more thorough overview of how agile methods can be employed in practice. We describe the details of the course and analyze the participating students' perceptions as well as our observations. We analyze the development artifacts, created by students during the course in respect to the two different development methodologies. We further present a summary of the lessons learned as well as recommendations for future similar courses. The survey conducted at the end of the course revealed an overwhelmingly positive attitude of students towards the integration of Kanban into the course.}, language = {en} } @misc{ArandaSchoelzelMendezetal.2018, author = {Aranda, Juan and Sch{\"o}lzel, Mario and Mendez, Diego and Carrillo, Henry}, title = {An energy consumption model for multiModal wireless sensor networks based on wake-up radio receivers}, series = {2018 IEEE Colombian Conference on Communications and Computing (COLCOM)}, journal = {2018 IEEE Colombian Conference on Communications and Computing (COLCOM)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-6820-7}, doi = {10.1109/ColComCon.2018.8466728}, pages = {6}, year = {2018}, abstract = {Energy consumption is a major concern in Wireless Sensor Networks. A significant waste of energy occurs due to the idle listening and overhearing problems, which are typically avoided by turning off the radio, while no transmission is ongoing. The classical approach for allowing the reception of messages in such situations is to use a low-duty-cycle protocol, and to turn on the radio periodically, which reduces the idle listening problem, but requires timers and usually unnecessary wakeups. A better solution is to turn on the radio only on demand by using a Wake-up Radio Receiver (WuRx). In this paper, an energy model is presented to estimate the energy saving in various multi-hop network topologies under several use cases, when a WuRx is used instead of a classical low-duty-cycling protocol. The presented model also allows for estimating the benefit of various WuRx properties like using addressing or not.}, language = {en} } @article{MarufuKayemWolthusen2018, author = {Marufu, Anesu M. C. and Kayem, Anne Voluntas dei Massah and Wolthusen, Stephen D.}, title = {The design and classification of cheating attacks on power marketing schemes in resource constrained smart micro-grids}, series = {Smart Micro-Grid Systems Security and Privacy}, volume = {71}, journal = {Smart Micro-Grid Systems Security and Privacy}, publisher = {Springer}, address = {Dordrecht}, isbn = {978-3-319-91427-5}, doi = {10.1007/978-3-319-91427-5_6}, pages = {103 -- 144}, year = {2018}, abstract = {In this chapter, we provide a framework to specify how cheating attacks can be conducted successfully on power marketing schemes in resource constrained smart micro-grids. This is an important problem because such cheating attacks can destabilise and in the worst case result in a breakdown of the micro-grid. We consider three aspects, in relation to modelling cheating attacks on power auctioning schemes. First, we aim to specify exactly how in spite of the resource constrained character of the micro-grid, cheating can be conducted successfully. Second, we consider how mitigations can be modelled to prevent cheating, and third, we discuss methods of maintaining grid stability and reliability even in the presence of cheating attacks. We use an Automated-Cheating-Attack (ACA) conception to build a taxonomy of cheating attacks based on the idea of adversarial acquisition of surplus energy. Adversarial acquisitions of surplus energy allow malicious users to pay less for access to more power than the quota allowed for the price paid. The impact on honest users, is the lack of an adequate supply of energy to meet power demand requests. We conclude with a discussion of the performance overhead of provoking, detecting, and mitigating such attacks efficiently.}, language = {en} } @misc{HaarmannBatoulisNikajetal.2018, author = {Haarmann, Stephan and Batoulis, Kimon and Nikaj, Adriatik and Weske, Mathias}, title = {DMN Decision Execution on the Ethereum Blockchain}, series = {Advanced Information Systems Engineering, CAISE 2018}, volume = {10816}, journal = {Advanced Information Systems Engineering, CAISE 2018}, publisher = {Springer}, address = {Cham}, isbn = {978-3-319-91563-0}, issn = {0302-9743}, doi = {10.1007/978-3-319-91563-0_20}, pages = {327 -- 341}, year = {2018}, abstract = {Recently blockchain technology has been introduced to execute interacting business processes in a secure and transparent way. While the foundations for process enactment on blockchain have been researched, the execution of decisions on blockchain has not been addressed yet. In this paper we argue that decisions are an essential aspect of interacting business processes, and, therefore, also need to be executed on blockchain. The immutable representation of decision logic can be used by the interacting processes, so that decision taking will be more secure, more transparent, and better auditable. The approach is based on a mapping of the DMN language S-FEEL to Solidity code to be run on the Ethereum blockchain. The work is evaluated by a proof-of-concept prototype and an empirical cost evaluation.}, language = {en} } @misc{GrossTiwariHammer2018, author = {Gross, Sascha and Tiwari, Abhishek and Hammer, Christian}, title = {PlAnalyzer}, series = {Computer Security(ESORICS 2018), PT II}, volume = {11099}, journal = {Computer Security(ESORICS 2018), PT II}, publisher = {Springer}, address = {Cham}, isbn = {978-3-319-98989-1}, issn = {0302-9743}, doi = {10.1007/978-3-319-98989-1_3}, pages = {41 -- 59}, year = {2018}, abstract = {In this work we propose PIAnalyzer, a novel approach to analyze PendingIntent related vulnerabilities. We empirically evaluate PIAnalyzer on a set of 1000 randomly selected applications from the Google Play Store and find 1358 insecure usages of Pendinglntents, including 70 severe vulnerabilities. We manually inspected ten reported vulnerabilities out of which nine correctly reported vulnerabilities, indicating a high precision. The evaluation shows that PIAnalyzer is efficient with an average execution time of 13 seconds per application.}, language = {en} } @misc{GalkeGerstenkornScherp2018, author = {Galke, Lukas and Gerstenkorn, Gunnar and Scherp, Ansgar}, title = {A case atudy of closed-domain response suggestion with limited training data}, series = {Database and Expert Systems Applications : DEXA 2018 Iinternational workshops}, volume = {903}, journal = {Database and Expert Systems Applications : DEXA 2018 Iinternational workshops}, publisher = {Springer}, address = {Berlin}, isbn = {978-3-319-99133-7}, issn = {1865-0929}, doi = {10.1007/978-3-319-99133-7_18}, pages = {218 -- 229}, year = {2018}, abstract = {We analyze the problem of response suggestion in a closed domain along a real-world scenario of a digital library. We present a text-processing pipeline to generate question-answer pairs from chat transcripts. On this limited amount of training data, we compare retrieval-based, conditioned-generation, and dedicated representation learning approaches for response suggestion. Our results show that retrieval-based methods that strive to find similar, known contexts are preferable over parametric approaches from the conditioned-generation family, when the training data is limited. We, however, identify a specific representation learning approach that is competitive to the retrieval-based approaches despite the training data limitation.}, language = {en} } @article{YousfiBatoulisWeske2019, author = {Yousfi, Alaaeddine and Batoulis, Kimon and Weske, Mathias}, title = {Achieving Business Process Improvement via Ubiquitous Decision-Aware Business Processes}, series = {ACM Transactions on Internet Technology}, volume = {19}, journal = {ACM Transactions on Internet Technology}, number = {1}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {1533-5399}, doi = {10.1145/3298986}, pages = {19}, year = {2019}, abstract = {Business process improvement is an endless challenge for many organizations. As long as there is a process, it must he improved. Nowadays, improvement initiatives are driven by professionals. This is no longer practical because people cannot perceive the enormous data of current business environments. Here, we introduce ubiquitous decision-aware business processes. They pervade the physical space, analyze the ever-changing environments, and make decisions accordingly. We explain how they can be built and used for improvement. Our approach can be a valuable improvement option to alleviate the workload of participants by helping focus on the crucial rather than the menial tasks.}, language = {en} } @article{SchneiderMaximovaSakizloglouetal.2021, author = {Schneider, Sven and Maximova, Maria and Sakizloglou, Lucas and Giese, Holger}, title = {Formal testing of timed graph transformation systems using metric temporal graph logic}, series = {International journal on software tools for technology transfer}, volume = {23}, journal = {International journal on software tools for technology transfer}, number = {3}, publisher = {Springer}, address = {Heidelberg}, issn = {1433-2779}, doi = {10.1007/s10009-020-00585-w}, pages = {411 -- 488}, year = {2021}, abstract = {Embedded real-time systems generate state sequences where time elapses between state changes. Ensuring that such systems adhere to a provided specification of admissible or desired behavior is essential. Formal model-based testing is often a suitable cost-effective approach. We introduce an extended version of the formalism of symbolic graphs, which encompasses types as well as attributes, for representing states of dynamic systems. Relying on this extension of symbolic graphs, we present a novel formalism of timed graph transformation systems (TGTSs) that supports the model-based development of dynamic real-time systems at an abstract level where possible state changes and delays are specified by graph transformation rules. We then introduce an extended form of the metric temporal graph logic (MTGL) with increased expressiveness to improve the applicability of MTGL for the specification of timed graph sequences generated by a TGTS. Based on the metric temporal operators of MTGL and its built-in graph binding mechanics, we express properties on the structure and attributes of graphs as well as on the occurrence of graphs over time that are related by their inner structure. We provide formal support for checking whether a single generated timed graph sequence adheres to a provided MTGL specification. Relying on this logical foundation, we develop a testing framework for TGTSs that are specified using MTGL. Lastly, we apply this testing framework to a running example by using our prototypical implementation in the tool AutoGraph.}, language = {en} } @misc{BrinkmannHeine2019, author = {Brinkmann, Maik and Heine, Moreen}, title = {Can Blockchain Leverage for New Public Governance?}, series = {Proceedings of the 12th International Conference on Theory and Practice of Electronic Governance}, journal = {Proceedings of the 12th International Conference on Theory and Practice of Electronic Governance}, publisher = {Association for Computing Machinery}, address = {New York}, isbn = {978-1-4503-6644-1}, doi = {10.1145/3326365.3326409}, pages = {338 -- 341}, year = {2019}, abstract = {New Public Governance (NPG) as a paradigm for collaborative forms of public service delivery and Blockchain governance are trending topics for researchers and practitioners alike. Thus far, each topic has, on the whole, been discussed separately. This paper presents the preliminary results of ongoing research which aims to shed light on the more concrete benefits of Blockchain for the purpose of NPG. For the first time, a conceptual analysis is conducted on process level to spot benefits and limitations of Blockchain-based governance. Per process element, Blockchain key characteristics are mapped to functional aspects of NPG from a governance perspective. The preliminary results show that Blockchain offers valuable support for governments seeking methods to effectively coordinate co-producing networks. However, the extent of benefits of Blockchain varies across the process elements. It becomes evident that there is a need for off-chain processes. It is, therefore, argued in favour of intensifying research on off-chain governance processes to better understand the implications for and influences on on-chain governance.}, language = {en} } @article{LadleifWeske2021, author = {Ladleif, Jan and Weske, Mathias}, title = {Which event happened first?}, series = {Frontiers in blockchain}, volume = {4}, journal = {Frontiers in blockchain}, publisher = {Frontiers in Blockchain}, address = {Lausanne, Schweiz}, issn = {2624-7852}, doi = {10.3389/fbloc.2021.758169}, pages = {1 -- 16}, year = {2021}, abstract = {First come, first served: Critical choices between alternative actions are often made based on events external to an organization, and reacting promptly to their occurrence can be a major advantage over the competition. In Business Process Management (BPM), such deferred choices can be expressed in process models, and they are an important aspect of process engines. Blockchain-based process execution approaches are no exception to this, but are severely limited by the inherent properties of the platform: The isolated environment prevents direct access to external entities and data, and the non-continual runtime based entirely on atomic transactions impedes the monitoring and detection of events. In this paper we provide an in-depth examination of the semantics of deferred choice, and transfer them to environments such as the blockchain. We introduce and compare several oracle architectures able to satisfy certain requirements, and show that they can be implemented using state-of-the-art blockchain technology.}, language = {en} } @phdthesis{Batoulis2019, author = {Batoulis, Kimon}, title = {Sound integration of process and decision models}, doi = {10.25932/publishup-43738}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-437386}, school = {Universit{\"a}t Potsdam}, pages = {xviii, 155}, year = {2019}, abstract = {Business process management is an established technique for business organizations to manage and support their processes. Those processes are typically represented by graphical models designed with modeling languages, such as the Business Process Model and Notation (BPMN). Since process models do not only serve the purpose of documentation but are also a basis for implementation and automation of the processes, they have to satisfy certain correctness requirements. In this regard, the notion of soundness of workflow nets was developed, that can be applied to BPMN process models in order to verify their correctness. Because the original soundness criteria are very restrictive regarding the behavior of the model, different variants of the soundness notion have been developed for situations in which certain violations are not even harmful. All of those notions do only consider the control-flow structure of a process model, however. This poses a problem, taking into account the fact that with the recent release and the ongoing development of the Decision Model and Notation (DMN) standard, an increasing number of process models are complemented by respective decision models. DMN is a dedicated modeling language for decision logic and separates the concerns of process and decision logic into two different models, process and decision models respectively. Hence, this thesis is concerned with the development of decisionaware soundness notions, i.e., notions of soundness that build upon the original soundness ideas for process models, but additionally take into account complementary decision models. Similar to the various notions of workflow net soundness, this thesis investigates different notions of decision soundness that can be applied depending on the desired degree of restrictiveness. Since decision tables are a standardized means of DMN to represent decision logic, this thesis also puts special focus on decision tables, discussing how they can be translated into an unambiguous format and how their possible output values can be efficiently determined. Moreover, a prototypical implementation is described that supports checking a basic version of decision soundness. The decision soundness notions were also empirically evaluated on models from participants of an online course on process and decision modeling as well as from a process management project of a large insurance company. The evaluation demonstrates that violations of decision soundness indeed occur and can be detected with our approach.}, language = {en} } @article{RischKrestel2020, author = {Risch, Julian and Krestel, Ralf}, title = {Toxic comment detection in online discussions}, series = {Deep learning-based approaches for sentiment analysis}, journal = {Deep learning-based approaches for sentiment analysis}, editor = {Agarwal, Basant and Nayak, Richi and Mittal, Namita and Patnaik, Srikanta}, publisher = {Springer}, address = {Singapore}, isbn = {978-981-15-1216-2}, issn = {2524-7565}, doi = {10.1007/978-981-15-1216-2_4}, pages = {85 -- 109}, year = {2020}, abstract = {Comment sections of online news platforms are an essential space to express opinions and discuss political topics. In contrast to other online posts, news discussions are related to particular news articles, comments refer to each other, and individual conversations emerge. However, the misuse by spammers, haters, and trolls makes costly content moderation necessary. Sentiment analysis can not only support moderation but also help to understand the dynamics of online discussions. A subtask of content moderation is the identification of toxic comments. To this end, we describe the concept of toxicity and characterize its subclasses. Further, we present various deep learning approaches, including datasets and architectures, tailored to sentiment analysis in online discussions. One way to make these approaches more comprehensible and trustworthy is fine-grained instead of binary comment classification. On the downside, more classes require more training data. Therefore, we propose to augment training data by using transfer learning. We discuss real-world applications, such as semi-automated comment moderation and troll detection. Finally, we outline future challenges and current limitations in light of most recent research publications.}, language = {en} } @article{DombrowskiErmakovaFabian2019, author = {Dombrowski, Sebastian and Ermakova, Tatiana and Fabian, Benjamin}, title = {Graph-based analysis of cloud connectivity at the internet protocol level}, series = {International Journal of Communication Networks and Distributed Systems (IJCNDS)}, volume = {23}, journal = {International Journal of Communication Networks and Distributed Systems (IJCNDS)}, number = {1}, publisher = {Inderscience Enterprises Ltd}, address = {Geneva}, issn = {1754-3916}, doi = {10.1504/IJCNDS.2019.100644}, pages = {117 -- 142}, year = {2019}, abstract = {Internet connectivity of cloud services is of exceptional importance for both their providers and consumers. This article demonstrates the outlines of a method for measuring cloud-service connectivity at the internet protocol level from a client's perspective. For this, we actively collect connectivity data via traceroute measurements from PlanetLab to several major cloud services. Furthermore, we construct graph models from the collected data, and analyse the connectivity of the services based on important graph-based measures. Then, random and targeted node removal attacks are simulated, and the corresponding vulnerability of cloud services is evaluated. Our results indicate that cloud service hosts are, on average, much better connected than average hosts. However, when interconnecting nodes are removed in a targeted manner, cloud connectivity is dramatically reduced.}, language = {en} } @misc{BlaesiusEubeFeldtkelleretal.2018, author = {Blaesius, Thomas and Eube, Jan and Feldtkeller, Thomas and Friedrich, Tobias and Krejca, Martin Stefan and Lagodzinski, Gregor J. A. and Rothenberger, Ralf and Severin, Julius and Sommer, Fabian and Trautmann, Justin}, title = {Memory-restricted Routing With Tiled Map Data}, series = {2018 IEEE International Conference on Systems, Man, and Cybernetics (SMC)}, journal = {2018 IEEE International Conference on Systems, Man, and Cybernetics (SMC)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-6650-0}, issn = {1062-922X}, doi = {10.1109/SMC.2018.00567}, pages = {3347 -- 3354}, year = {2018}, abstract = {Modern routing algorithms reduce query time by depending heavily on preprocessed data. The recently developed Navigation Data Standard (NDS) enforces a separation between algorithms and map data, rendering preprocessing inapplicable. Furthermore, map data is partitioned into tiles with respect to their geographic coordinates. With the limited memory found in portable devices, the number of tiles loaded becomes the major factor for run time. We study routing under these restrictions and present new algorithms as well as empirical evaluations. Our results show that, on average, the most efficient algorithm presented uses more than 20 times fewer tile loads than a normal A*.}, language = {en} } @article{JiangNaumann2020, author = {Jiang, Lan and Naumann, Felix}, title = {Holistic primary key and foreign key detection}, series = {Journal of intelligent information systems : JIIS}, volume = {54}, journal = {Journal of intelligent information systems : JIIS}, number = {3}, publisher = {Springer}, address = {Dordrecht}, issn = {0925-9902}, doi = {10.1007/s10844-019-00562-z}, pages = {439 -- 461}, year = {2020}, abstract = {Primary keys (PKs) and foreign keys (FKs) are important elements of relational schemata in various applications, such as query optimization and data integration. However, in many cases, these constraints are unknown or not documented. Detecting them manually is time-consuming and even infeasible in large-scale datasets. We study the problem of discovering primary keys and foreign keys automatically and propose an algorithm to detect both, namely Holistic Primary Key and Foreign Key Detection (HoPF). PKs and FKs are subsets of the sets of unique column combinations (UCCs) and inclusion dependencies (INDs), respectively, for which efficient discovery algorithms are known. Using score functions, our approach is able to effectively extract the true PKs and FKs from the vast sets of valid UCCs and INDs. Several pruning rules are employed to speed up the procedure. We evaluate precision and recall on three benchmarks and two real-world datasets. The results show that our method is able to retrieve on average 88\% of all primary keys, and 91\% of all foreign keys. We compare the performance of HoPF with two baseline approaches that both assume the existence of primary keys.}, language = {en} } @article{KonakWegnerArnrich2020, author = {Konak, Orhan and Wegner, Pit and Arnrich, Bert}, title = {IMU-Based Movement Trajectory Heatmaps for Human Activity Recognition}, series = {Sensors}, volume = {20}, journal = {Sensors}, number = {24}, publisher = {MDPI}, address = {Basel}, issn = {1424-8220}, doi = {10.3390/s20247179}, pages = {15}, year = {2020}, abstract = {Recent trends in ubiquitous computing have led to a proliferation of studies that focus on human activity recognition (HAR) utilizing inertial sensor data that consist of acceleration, orientation and angular velocity. However, the performances of such approaches are limited by the amount of annotated training data, especially in fields where annotating data is highly time-consuming and requires specialized professionals, such as in healthcare. In image classification, this limitation has been mitigated by powerful oversampling techniques such as data augmentation. Using this technique, this work evaluates to what extent transforming inertial sensor data into movement trajectories and into 2D heatmap images can be advantageous for HAR when data are scarce. A convolutional long short-term memory (ConvLSTM) network that incorporates spatiotemporal correlations was used to classify the heatmap images. Evaluation was carried out on Deep Inertial Poser (DIP), a known dataset composed of inertial sensor data. The results obtained suggest that for datasets with large numbers of subjects, using state-of-the-art methods remains the best alternative. However, a performance advantage was achieved for small datasets, which is usually the case in healthcare. Moreover, movement trajectories provide a visual representation of human activities, which can help researchers to better interpret and analyze motion patterns.}, language = {en} } @phdthesis{Zuo2017, author = {Zuo, Zhe}, title = {From unstructured to structured: Context-based named entity mining from text}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-412576}, school = {Universit{\"a}t Potsdam}, pages = {vii, 112}, year = {2017}, abstract = {With recent advances in the area of information extraction, automatically extracting structured information from a vast amount of unstructured textual data becomes an important task, which is infeasible for humans to capture all information manually. Named entities (e.g., persons, organizations, and locations), which are crucial components in texts, are usually the subjects of structured information from textual documents. Therefore, the task of named entity mining receives much attention. It consists of three major subtasks, which are named entity recognition, named entity linking, and relation extraction. These three tasks build up an entire pipeline of a named entity mining system, where each of them has its challenges and can be employed for further applications. As a fundamental task in the natural language processing domain, studies on named entity recognition have a long history, and many existing approaches produce reliable results. The task is aiming to extract mentions of named entities in text and identify their types. Named entity linking recently received much attention with the development of knowledge bases that contain rich information about entities. The goal is to disambiguate mentions of named entities and to link them to the corresponding entries in a knowledge base. Relation extraction, as the final step of named entity mining, is a highly challenging task, which is to extract semantic relations between named entities, e.g., the ownership relation between two companies. In this thesis, we review the state-of-the-art of named entity mining domain in detail, including valuable features, techniques, evaluation methodologies, and so on. Furthermore, we present two of our approaches that focus on the named entity linking and relation extraction tasks separately. To solve the named entity linking task, we propose the entity linking technique, BEL, which operates on a textual range of relevant terms and aggregates decisions from an ensemble of simple classifiers. Each of the classifiers operates on a randomly sampled subset of the above range. In extensive experiments on hand-labeled and benchmark datasets, our approach outperformed state-of-the-art entity linking techniques, both in terms of quality and efficiency. For the task of relation extraction, we focus on extracting a specific group of difficult relation types, business relations between companies. These relations can be used to gain valuable insight into the interactions between companies and perform complex analytics, such as predicting risk or valuating companies. Our semi-supervised strategy can extract business relations between companies based on only a few user-provided seed company pairs. By doing so, we also provide a solution for the problem of determining the direction of asymmetric relations, such as the ownership_of relation. We improve the reliability of the extraction process by using a holistic pattern identification method, which classifies the generated extraction patterns. Our experiments show that we can accurately and reliably extract new entity pairs occurring in the target relation by using as few as five labeled seed pairs.}, language = {en} } @phdthesis{Papenbrock2017, author = {Papenbrock, Thorsten}, title = {Data profiling - efficient discovery of dependencies}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-406705}, school = {Universit{\"a}t Potsdam}, pages = {viii, ii, 141}, year = {2017}, abstract = {Data profiling is the computer science discipline of analyzing a given dataset for its metadata. The types of metadata range from basic statistics, such as tuple counts, column aggregations, and value distributions, to much more complex structures, in particular inclusion dependencies (INDs), unique column combinations (UCCs), and functional dependencies (FDs). If present, these statistics and structures serve to efficiently store, query, change, and understand the data. Most datasets, however, do not provide their metadata explicitly so that data scientists need to profile them. While basic statistics are relatively easy to calculate, more complex structures present difficult, mostly NP-complete discovery tasks; even with good domain knowledge, it is hardly possible to detect them manually. Therefore, various profiling algorithms have been developed to automate the discovery. None of them, however, can process datasets of typical real-world size, because their resource consumptions and/or execution times exceed effective limits. In this thesis, we propose novel profiling algorithms that automatically discover the three most popular types of complex metadata, namely INDs, UCCs, and FDs, which all describe different kinds of key dependencies. The task is to extract all valid occurrences from a given relational instance. The three algorithms build upon known techniques from related work and complement them with algorithmic paradigms, such as divide \& conquer, hybrid search, progressivity, memory sensitivity, parallelization, and additional pruning to greatly improve upon current limitations. Our experiments show that the proposed algorithms are orders of magnitude faster than related work. They are, in particular, now able to process datasets of real-world, i.e., multiple gigabytes size with reasonable memory and time consumption. Due to the importance of data profiling in practice, industry has built various profiling tools to support data scientists in their quest for metadata. These tools provide good support for basic statistics and they are also able to validate individual dependencies, but they lack real discovery features even though some fundamental discovery techniques are known for more than 15 years. To close this gap, we developed Metanome, an extensible profiling platform that incorporates not only our own algorithms but also many further algorithms from other researchers. With Metanome, we make our research accessible to all data scientists and IT-professionals that are tasked with data profiling. Besides the actual metadata discovery, the platform also offers support for the ranking and visualization of metadata result sets. Being able to discover the entire set of syntactically valid metadata naturally introduces the subsequent task of extracting only the semantically meaningful parts. This is challenge, because the complete metadata results are surprisingly large (sometimes larger than the datasets itself) and judging their use case dependent semantic relevance is difficult. To show that the completeness of these metadata sets is extremely valuable for their usage, we finally exemplify the efficient processing and effective assessment of functional dependencies for the use case of schema normalization.}, language = {en} } @phdthesis{Zieger2017, author = {Zieger, Tobias}, title = {Self-adaptive data quality}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-410573}, school = {Universit{\"a}t Potsdam}, pages = {vii, 125}, year = {2017}, abstract = {Carrying out business processes successfully is closely linked to the quality of the data inventory in an organization. Lacks in data quality lead to problems: Incorrect address data prevents (timely) shipments to customers. Erroneous orders lead to returns and thus to unnecessary effort. Wrong pricing forces companies to miss out on revenues or to impair customer satisfaction. If orders or customer records cannot be retrieved, complaint management takes longer. Due to erroneous inventories, too few or too much supplies might be reordered. A special problem with data quality and the reason for many of the issues mentioned above are duplicates in databases. Duplicates are different representations of same real-world objects in a dataset. However, these representations differ from each other and are for that reason hard to match by a computer. Moreover, the number of required comparisons to find those duplicates grows with the square of the dataset size. To cleanse the data, these duplicates must be detected and removed. Duplicate detection is a very laborious process. To achieve satisfactory results, appropriate software must be created and configured (similarity measures, partitioning keys, thresholds, etc.). Both requires much manual effort and experience. This thesis addresses automation of parameter selection for duplicate detection and presents several novel approaches that eliminate the need for human experience in parts of the duplicate detection process. A pre-processing step is introduced that analyzes the datasets in question and classifies their attributes semantically. Not only do these annotations help understanding the respective datasets, but they also facilitate subsequent steps, for example, by selecting appropriate similarity measures or normalizing the data upfront. This approach works without schema information. Following that, we show a partitioning technique that strongly reduces the number of pair comparisons for the duplicate detection process. The approach automatically finds particularly suitable partitioning keys that simultaneously allow for effective and efficient duplicate retrieval. By means of a user study, we demonstrate that this technique finds partitioning keys that outperform expert suggestions and additionally does not need manual configuration. Furthermore, this approach can be applied independently of the attribute types. To measure the success of a duplicate detection process and to execute the described partitioning approach, a gold standard is required that provides information about the actual duplicates in a training dataset. This thesis presents a technique that uses existing duplicate detection results and crowdsourcing to create a near gold standard that can be used for the purposes above. Another part of the thesis describes and evaluates strategies how to reduce these crowdsourcing costs and to achieve a consensus with less effort.}, language = {en} } @phdthesis{Vogel2018, author = {Vogel, Thomas}, title = {Model-driven engineering of self-adaptive software}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-409755}, school = {Universit{\"a}t Potsdam}, pages = {xvi, 357}, year = {2018}, abstract = {The development of self-adaptive software requires the engineering of an adaptation engine that controls the underlying adaptable software by a feedback loop. State-of-the-art approaches prescribe the feedback loop in terms of numbers, how the activities (e.g., monitor, analyze, plan, and execute (MAPE)) and the knowledge are structured to a feedback loop, and the type of knowledge. Moreover, the feedback loop is usually hidden in the implementation or framework and therefore not visible in the architectural design. Additionally, an adaptation engine often employs runtime models that either represent the adaptable software or capture strategic knowledge such as reconfiguration strategies. State-of-the-art approaches do not systematically address the interplay of such runtime models, which would otherwise allow developers to freely design the entire feedback loop. This thesis presents ExecUtable RuntimE MegAmodels (EUREMA), an integrated model-driven engineering (MDE) solution that rigorously uses models for engineering feedback loops. EUREMA provides a domain-specific modeling language to specify and an interpreter to execute feedback loops. The language allows developers to freely design a feedback loop concerning the activities and runtime models (knowledge) as well as the number of feedback loops. It further supports structuring the feedback loops in the adaptation engine that follows a layered architectural style. Thus, EUREMA makes the feedback loops explicit in the design and enables developers to reason about design decisions. To address the interplay of runtime models, we propose the concept of a runtime megamodel, which is a runtime model that contains other runtime models as well as activities (e.g., MAPE) working on the contained models. This concept is the underlying principle of EUREMA. The resulting EUREMA (mega)models are kept alive at runtime and they are directly executed by the EUREMA interpreter to run the feedback loops. Interpretation provides the flexibility to dynamically adapt a feedback loop. In this context, EUREMA supports engineering self-adaptive software in which feedback loops run independently or in a coordinated fashion within the same layer as well as on top of each other in different layers of the adaptation engine. Moreover, we consider preliminary means to evolve self-adaptive software by providing a maintenance interface to the adaptation engine. This thesis discusses in detail EUREMA by applying it to different scenarios such as single, multiple, and stacked feedback loops for self-repairing and self-optimizing the mRUBiS application. Moreover, it investigates the design and expressiveness of EUREMA, reports on experiments with a running system (mRUBiS) and with alternative solutions, and assesses EUREMA with respect to quality attributes such as performance and scalability. The conducted evaluation provides evidence that EUREMA as an integrated and open MDE approach for engineering self-adaptive software seamlessly integrates the development and runtime environments using the same formalism to specify and execute feedback loops, supports the dynamic adaptation of feedback loops in layered architectures, and achieves an efficient execution of feedback loops by leveraging incrementality.}, language = {en} } @phdthesis{Bazhenova2018, author = {Bazhenova, Ekaterina}, title = {Discovery of Decision Models Complementary to Process Models}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-410020}, school = {Universit{\"a}t Potsdam}, year = {2018}, abstract = {Business process management is an acknowledged asset for running an organization in a productive and sustainable way. One of the most important aspects of business process management, occurring on a daily basis at all levels, is decision making. In recent years, a number of decision management frameworks have appeared in addition to existing business process management systems. More recently, Decision Model and Notation (DMN) was developed by the OMG consortium with the aim of complementing the widely used Business Process Model and Notation (BPMN). One of the reasons for the emergence of DMN is the increasing interest in the evolving paradigm known as the separation of concerns. This paradigm states that modeling decisions complementary to processes reduces process complexity by externalizing decision logic from process models and importing it into a dedicated decision model. Such an approach increases the agility of model design and execution. This provides organizations with the flexibility to adapt to the ever increasing rapid and dynamic changes in the business ecosystem. The research gap, identified by us, is that the separation of concerns, recommended by DMN, prescribes the externalization of the decision logic of process models in one or more separate decision models, but it does not specify this can be achieved. The goal of this thesis is to overcome the presented gap by developing a framework for discovering decision models in a semi-automated way from information about existing process decision making. Thus, in this thesis we develop methodologies to extract decision models from: (1) control flow and data of process models that exist in enterprises; and (2) from event logs recorded by enterprise information systems, encapsulating day-to-day operations. Furthermore, we provide an extension of the methodologies to discover decision models from event logs enriched with fuzziness, a tool dealing with partial knowledge of the process execution information. All the proposed techniques are implemented and evaluated in case studies using real-life and synthetic process models and event logs. The evaluation of these case studies shows that the proposed methodologies provide valid and accurate output decision models that can serve as blueprints for executing decisions complementary to process models. Thus, these methodologies have applicability in the real world and they can be used, for example, for compliance checks, among other uses, which could improve the organization's decision making and hence it's overall performance.}, language = {en} }