@article{BirnickBlaesiusFriedrichetal.2020, author = {Birnick, Johann and Bl{\"a}sius, Thomas and Friedrich, Tobias and Naumann, Felix and Papenbrock, Thorsten and Schirneck, Friedrich Martin}, title = {Hitting set enumeration with partial information for unique column combination discovery}, series = {Proceedings of the VLDB Endowment}, volume = {13}, journal = {Proceedings of the VLDB Endowment}, number = {11}, publisher = {Association for Computing Machinery}, address = {[New York, NY]}, issn = {2150-8097}, doi = {10.14778/3407790.3407824}, pages = {2270 -- 2283}, year = {2020}, abstract = {Unique column combinations (UCCs) are a fundamental concept in relational databases. They identify entities in the data and support various data management activities. Still, UCCs are usually not explicitly defined and need to be discovered. State-of-the-art data profiling algorithms are able to efficiently discover UCCs in moderately sized datasets, but they tend to fail on large and, in particular, on wide datasets due to run time and memory limitations.
In this paper, we introduce HPIValid, a novel UCC discovery algorithm that implements a faster and more resource-saving search strategy. HPIValid models the metadata discovery as a hitting set enumeration problem in hypergraphs. In this way, it combines efficient discovery techniques from data profiling research with the most recent theoretical insights into enumeration algorithms. Our evaluation shows that HPIValid is not only orders of magnitude faster than related work, it also has a much smaller memory footprint.}, language = {en} } @article{ShiSchirneckFriedrichetal.2018, author = {Shi, Feng and Schirneck, Friedrich Martin and Friedrich, Tobias and K{\"o}tzing, Timo and Neumann, Frank}, title = {Reoptimization time analysis of evolutionary algorithms on linear functions under dynamic uniform constraints}, series = {Algorithmica : an international journal in computer science}, volume = {82}, journal = {Algorithmica : an international journal in computer science}, number = {10}, publisher = {Springer}, address = {New York}, issn = {0178-4617}, doi = {10.1007/s00453-020-00739-x}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-605295}, pages = {3117 -- 3123}, year = {2018}, abstract = {Rigorous runtime analysis is a major approach towards understanding evolutionary computing techniques, and in this area linear pseudo-Boolean objective functions play a central role. Having an additional linear constraint is then equivalent to the NP-hard Knapsack problem, certain classes thereof have been studied in recent works. In this article, we present a dynamic model of optimizing linear functions under uniform constraints. Starting from an optimal solution with respect to a given constraint bound, we investigate the runtimes that different evolutionary algorithms need to recompute an optimal solution when the constraint bound changes by a certain amount. The classical (1+1) EA and several population-based algorithms are designed for that purpose, and are shown to recompute efficiently. Furthermore, a variant of the (1+(λ,λ))GA for the dynamic optimization problem is studied, whose performance is better when the change of the constraint bound is small.}, language = {en} } @article{BlaesiusFriedrichSchirneck2021, author = {Blaesius, Thomas and Friedrich, Tobias and Schirneck, Friedrich Martin}, title = {The complexity of dependency detection and discovery in relational databases}, series = {Theoretical computer science}, volume = {900}, journal = {Theoretical computer science}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0304-3975}, doi = {10.1016/j.tcs.2021.11.020}, pages = {79 -- 96}, year = {2021}, abstract = {Multi-column dependencies in relational databases come associated with two different computational tasks. The detection problem is to decide whether a dependency of a certain type and size holds in a given database, the discovery problem asks to enumerate all valid dependencies of that type. We settle the complexity of both of these problems for unique column combinations (UCCs), functional dependencies (FDs), and inclusion dependencies (INDs). We show that the detection of UCCs and FDs is W[2]-complete when parameterized by the solution size. The discovery of inclusion-wise minimal UCCs is proven to be equivalent under parsimonious reductions to the transversal hypergraph problem of enumerating the minimal hitting sets of a hypergraph. The discovery of FDs is equivalent to the simultaneous enumeration of the hitting sets of multiple input hypergraphs. We further identify the detection of INDs as one of the first natural W[3]-complete problems. The discovery of maximal INDs is shown to be equivalent to enumerating the maximal satisfying assignments of antimonotone, 3-normalized Boolean formulas.}, language = {en} } @article{BlaesiusFriedrichLischeidetal.2022, author = {Bl{\"a}sius, Thomas and Friedrich, Tobias and Lischeid, Julius and Meeks, Kitty and Schirneck, Friedrich Martin}, title = {Efficiently enumerating hitting sets of hypergraphs arising in data profiling}, series = {Journal of computer and system sciences : JCSS}, volume = {124}, journal = {Journal of computer and system sciences : JCSS}, publisher = {Elsevier}, address = {San Diego}, issn = {0022-0000}, doi = {10.1016/j.jcss.2021.10.002}, pages = {192 -- 213}, year = {2022}, abstract = {The transversal hypergraph problem asks to enumerate the minimal hitting sets of a hypergraph. If the solutions have bounded size, Eiter and Gottlob [SICOMP'95] gave an algorithm running in output-polynomial time, but whose space requirement also scales with the output. We improve this to polynomial delay and space. Central to our approach is the extension problem, deciding for a set X of vertices whether it is contained in any minimal hitting set. We show that this is one of the first natural problems to be W[3]-complete. We give an algorithm for the extension problem running in time O(m(vertical bar X vertical bar+1) n) and prove a SETH-lower bound showing that this is close to optimal. We apply our enumeration method to the discovery problem of minimal unique column combinations from data profiling. Our empirical evaluation suggests that the algorithm outperforms its worst-case guarantees on hypergraphs stemming from real-world databases.}, language = {en} } @techreport{DoellnerFriedrichArnrichetal.2022, author = {D{\"o}llner, J{\"u}rgen Roland Friedrich and Friedrich, Tobias and Arnrich, Bert and Hirschfeld, Robert and Lippert, Christoph and Meinel, Christoph}, title = {Abschlussbericht KI-Labor ITSE}, doi = {10.25932/publishup-57860}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-578604}, pages = {60}, year = {2022}, abstract = {Der Abschlussbericht beschreibt Aufgaben und Ergebnisse des KI-Labors "ITSE". Gegenstand des KI-Labors bildeten Methodik, Technik und Ausbildung in der IT-Systemtechnik zur Analyse, Planung und Konstruktion KI-basierter, komplexer IT-Systeme.}, language = {de} } @book{MeinelDoellnerWeskeetal.2021, author = {Meinel, Christoph and D{\"o}llner, J{\"u}rgen Roland Friedrich and Weske, Mathias and Polze, Andreas and Hirschfeld, Robert and Naumann, Felix and Giese, Holger and Baudisch, Patrick and Friedrich, Tobias and B{\"o}ttinger, Erwin and Lippert, Christoph and D{\"o}rr, Christian and Lehmann, Anja and Renard, Bernhard and Rabl, Tilmann and Uebernickel, Falk and Arnrich, Bert and H{\"o}lzle, Katharina}, title = {Proceedings of the HPI Research School on Service-oriented Systems Engineering 2020 Fall Retreat}, number = {138}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-513-2}, issn = {1613-5652}, doi = {10.25932/publishup-50413}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-504132}, publisher = {Universit{\"a}t Potsdam}, pages = {vi, 144}, year = {2021}, abstract = {Design and Implementation of service-oriented architectures imposes a huge number of research questions from the fields of software engineering, system analysis and modeling, adaptability, and application integration. Component orientation and web services are two approaches for design and realization of complex web-based system. Both approaches allow for dynamic application adaptation as well as integration of enterprise application. Service-Oriented Systems Engineering represents a symbiosis of best practices in object-orientation, component-based development, distributed computing, and business process management. It provides integration of business and IT concerns. The annual Ph.D. Retreat of the Research School provides each member the opportunity to present his/her current state of their research and to give an outline of a prospective Ph.D. thesis. Due to the interdisciplinary structure of the research school, this technical report covers a wide range of topics. These include but are not limited to: Human Computer Interaction and Computer Vision as Service; Service-oriented Geovisualization Systems; Algorithm Engineering for Service-oriented Systems; Modeling and Verification of Self-adaptive Service-oriented Systems; Tools and Methods for Software Engineering in Service-oriented Systems; Security Engineering of Service-based IT Systems; Service-oriented Information Systems; Evolutionary Transition of Enterprise Applications to Service Orientation; Operating System Abstractions for Service-oriented Computing; and Services Specification, Composition, and Enactment.}, language = {en} } @article{FriedrichKrejcaRothenbergeretal.2019, author = {Friedrich, Tobias and Krejca, Martin Stefan and Rothenberger, Ralf and Arndt, Tobias and Hafner, Danijar and Kellermeier, Thomas and Krogmann, Simon and Razmjou, Armin}, title = {Routing for on-street parking search using probabilistic data}, series = {AI communications : AICOM ; the European journal on artificial intelligence}, volume = {32}, journal = {AI communications : AICOM ; the European journal on artificial intelligence}, number = {2}, publisher = {IOS Press}, address = {Amsterdam}, issn = {0921-7126}, doi = {10.3233/AIC-180574}, pages = {113 -- 124}, year = {2019}, abstract = {A significant percentage of urban traffic is caused by the search for parking spots. One possible approach to improve this situation is to guide drivers along routes which are likely to have free parking spots. The task of finding such a route can be modeled as a probabilistic graph problem which is NP-complete. Thus, we propose heuristic approaches for solving this problem and evaluate them experimentally. For this, we use probabilities of finding a parking spot, which are based on publicly available empirical data from TomTom International B.V. Additionally, we propose a heuristic that relies exclusively on conventional road attributes. Our experiments show that this algorithm comes close to the baseline by a factor of 1.3 in our cost measure. Last, we complement our experiments with results from a field study, comparing the success rates of our algorithms against real human drivers.}, language = {en} } @article{FriedrichKoetzingKrejcaetal.2016, author = {Friedrich, Tobias and K{\"o}tzing, Timo and Krejca, Martin Stefan and Sutton, Andrew M.}, title = {Robustness of Ant Colony Optimization to Noise}, series = {Evolutionary computation}, volume = {24}, journal = {Evolutionary computation}, publisher = {MIT Press}, address = {Cambridge}, issn = {1063-6560}, doi = {10.1162/EVCO_a_00178}, pages = {237 -- 254}, year = {2016}, abstract = {Recently, ant colony optimization (ACO) algorithms have proven to be efficient in uncertain environments, such as noisy or dynamically changing fitness functions. Most of these analyses have focused on combinatorial problems such as path finding. We rigorously analyze an ACO algorithm optimizing linear pseudo- Boolean functions under additive posterior noise. We study noise distributions whose tails decay exponentially fast, including the classical case of additive Gaussian noise. Without noise, the classical (mu + 1) EA outperforms any ACO algorithm, with smaller mu being better; however, in the case of large noise, the (mu + 1) EA fails, even for high values of mu (which are known to help against small noise). In this article, we show that ACO is able to deal with arbitrarily large noise in a graceful manner; that is, as long as the evaporation factor. is small enough, dependent on the variance s2 of the noise and the dimension n of the search space, optimization will be successful. We also briefly consider the case of prior noise and prove that ACO can also efficiently optimize linear functions under this noise model.}, language = {en} } @article{FriedrichKoetzingKrejca2019, author = {Friedrich, Tobias and K{\"o}tzing, Timo and Krejca, Martin Stefan}, title = {Unbiasedness of estimation-of-distribution algorithms}, series = {Theoretical computer science}, volume = {785}, journal = {Theoretical computer science}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0304-3975}, doi = {10.1016/j.tcs.2018.11.001}, pages = {46 -- 59}, year = {2019}, abstract = {In the context of black-box optimization, black-box complexity is used for understanding the inherent difficulty of a given optimization problem. Central to our understanding of nature-inspired search heuristics in this context is the notion of unbiasedness. Specialized black-box complexities have been developed in order to better understand the limitations of these heuristics - especially of (population-based) evolutionary algorithms (EAs). In contrast to this, we focus on a model for algorithms explicitly maintaining a probability distribution over the search space: so-called estimation-of-distribution algorithms (EDAs). We consider the recently introduced n-Bernoulli-lambda-EDA framework, which subsumes, for example, the commonly known EDAs PBIL, UMDA, lambda-MMAS(IB), and cGA. We show that an n-Bernoulli-lambda-EDA is unbiased if and only if its probability distribution satisfies a certain invariance property under isometric automorphisms of [0, 1](n). By restricting how an n-Bernoulli-lambda-EDA can perform an update, in a way common to many examples, we derive conciser characterizations, which are easy to verify. We demonstrate this by showing that our examples above are all unbiased. (C) 2018 Elsevier B.V. All rights reserved.}, language = {en} } @article{RiechelmanFohlmeisterKlugeetal.2019, author = {Riechelman, Dana F. C. and Fohlmeister, Jens Bernd and Kluge, Tobias and Jochum, Klaus Peter and Richter, Detlev K. and Deininger, Michael and Friedrich, Ronny and Frank, Norbert and Scholz, Denis}, title = {Evaluating the potential of tree-ring methodology for cross-dating of three annually laminated stalagmites from Zoolithencave (SE Germany)}, series = {Quaternary geochronology : the international research and review journal on advances in quaternary dating techniques}, volume = {52}, journal = {Quaternary geochronology : the international research and review journal on advances in quaternary dating techniques}, publisher = {Elsevier}, address = {Oxford}, issn = {1871-1014}, doi = {10.1016/j.quageo.2019.04.001}, pages = {37 -- 50}, year = {2019}, abstract = {Three small stalagmites from Zoolithencave (southern Germany) show visible laminae, which consist of a clear and a brownish, pigmented layer pair. This potentially provides the opportunity to construct precise chronologies by counting annual laminae. The growth period of the three stalagmites was constrained by the C-14 bomb peak in the youngest part of all three stalagmites and C-14-dating of a piece of charcoal in the consolidated base part of stalagmite Zoo-rez-2. These data suggest an age of AD 1970 for the top laminae and a lower age limit of AD 1973-1682 or AD 1735-1778. Laminae were counted and their thickness determined on scanned thin sections of all stalagmites. On stalagmites Zoo-rez-1 and -2, three tracks were measured near the growth axes, each separated into three sections at prominent anchor laminae (I, II, III). Each section was replicated three times (a, b, c). For Zoo-rez-3, only one track was measured. The total number of laminae counted for Zoo-rez-1 ranges from 138 to 177, for Zoo-rez-2 from 119 to 145, and for Zoo-rez-3 from 159 to 166. The numbers agree well with the range constrained by the bomb peak and the age of the charcoal, which supports the annual origin of the laminae. The replicated measurements of the different tracks as well as the three different tracks on the stalagmites Zoo-rez-1 and-2 were cross-dated using the TSAP-Win (R) tree-ring software. This software is very useful for cross-dating because it enables to insert or delete missing or false laminae as well as identifying common pattern by shifting the series back and forth in time. However, visual inspection of the thin sections was necessary to confirm detection of missing or false laminae by TSAP-Win (R). For all three Zoo-rez speleothems, crossdating of the mean lamina thickness series was not possible due to a missing common pattern. The cross-dating procedure results in three refined chronologies for the three Zoo-rez stalagmites of ranging from AD 1821-1970 (Zoo-rez-1), AD 1835-1970 (Zoo-rez-2), and AD 1808-1970 (Zoo-rez-3).}, language = {en} }