@article{BlaesiusFriedrichKatzmannetal.2022, author = {Bl{\"a}sius, Thomas and Friedrich, Tobias and Katzmann, Maximilian and Meyer, Ulrich and Penschuck, Manuel and Weyand, Christopher}, title = {Efficiently generating geometric inhomogeneous and hyperbolic random graphs}, series = {Network Science}, volume = {10}, journal = {Network Science}, number = {4}, publisher = {Cambridge Univ. Press}, address = {New York}, issn = {2050-1242}, doi = {10.1017/nws.2022.32}, pages = {361 -- 380}, year = {2022}, abstract = {Hyperbolic random graphs (HRGs) and geometric inhomogeneous random graphs (GIRGs) are two similar generative network models that were designed to resemble complex real-world networks. In particular, they have a power-law degree distribution with controllable exponent beta and high clustering that can be controlled via the temperature T. We present the first implementation of an efficient GIRG generator running in expected linear time. Besides varying temperatures, it also supports underlying geometries of higher dimensions. It is capable of generating graphs with ten million edges in under a second on commodity hardware. The algorithm can be adapted to HRGs. Our resulting implementation is the fastest sequential HRG generator, despite the fact that we support non-zero temperatures. Though non-zero temperatures are crucial for many applications, most existing generators are restricted to T = 0 . We also support parallelization, although this is not the focus of this paper. Moreover, we note that our generators draw from the correct probability distribution, that is, they involve no approximation. Besides the generators themselves, we also provide an efficient algorithm to determine the non-trivial dependency between the average degree of the resulting graph and the input parameters of the GIRG model. This makes it possible to specify the desired expected average degree as input. Moreover, we investigate the differences between HRGs and GIRGs, shedding new light on the nature of the relation between the two models. Although HRGs represent, in a certain sense, a special case of the GIRG model, we find that a straightforward inclusion does not hold in practice. However, the difference is negligible for most use cases.}, language = {en} } @article{HackerNaumannFriedrichetal.2022, author = {Hacker, Philipp and Naumann, Felix and Friedrich, Tobias and Grundmann, Stefan and Lehmann, Anja and Zech, Herbert}, title = {AI compliance - challenges of bridging data science and law}, series = {Journal of Data and Information Quality (JDIQ)}, volume = {14}, journal = {Journal of Data and Information Quality (JDIQ)}, number = {3}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {1936-1955}, doi = {10.1145/3531532}, pages = {4}, year = {2022}, abstract = {This vision article outlines the main building blocks of what we term AI Compliance, an effort to bridge two complementary research areas: computer science and the law. Such research has the goal to model, measure, and affect the quality of AI artifacts, such as data, models, and applications, to then facilitate adherence to legal standards.}, language = {en} } @article{BlaesiusFriedrichKrejcaetal.2022, author = {Bl{\"a}sius, Thomas and Friedrich, Tobias and Krejca, Martin S. and Molitor, Louise}, title = {The impact of geometry on monochrome regions in the flip Schelling process}, series = {Computational geometry}, volume = {108}, journal = {Computational geometry}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0925-7721}, doi = {10.1016/j.comgeo.2022.101902}, year = {2022}, abstract = {Schelling's classical segregation model gives a coherent explanation for the wide-spread phenomenon of residential segregation. We introduce an agent-based saturated open-city variant, the Flip Schelling Process (FSP), in which agents, placed on a graph, have one out of two types and, based on the predominant type in their neighborhood, decide whether to change their types; similar to a new agent arriving as soon as another agent leaves the vertex. We investigate the probability that an edge {u,v} is monochrome, i.e., that both vertices u and v have the same type in the FSP, and we provide a general framework for analyzing the influence of the underlying graph topology on residential segregation. In particular, for two adjacent vertices, we show that a highly decisive common neighborhood, i.e., a common neighborhood where the absolute value of the difference between the number of vertices with different types is high, supports segregation and, moreover, that large common neighborhoods are more decisive. As an application, we study the expected behavior of the FSP on two common random graph models with and without geometry: (1) For random geometric graphs, we show that the existence of an edge {u,v} makes a highly decisive common neighborhood for u and v more likely. Based on this, we prove the existence of a constant c>0 such that the expected fraction of monochrome edges after the FSP is at least 1/2+c. (2) For Erdős-R{\´e}nyi graphs we show that large common neighborhoods are unlikely and that the expected fraction of monochrome edges after the FSP is at most 1/2+o(1). Our results indicate that the cluster structure of the underlying graph has a significant impact on the obtained segregation strength.}, language = {en} } @article{CohenHershcovitchTarazetal.2023, author = {Cohen, Sarel and Hershcovitch, Moshik and Taraz, Martin and Kissig, Otto and Issac, Davis and Wood, Andrew and Waddington, Daniel and Chin, Peter and Friedrich, Tobias}, title = {Improved and optimized drug repurposing for the SARS-CoV-2 pandemic}, series = {PLoS one}, volume = {18}, journal = {PLoS one}, number = {3}, publisher = {PLoS}, address = {San Fransisco}, issn = {1932-6203}, doi = {10.1371/journal.pone.0266572}, pages = {13}, year = {2023}, abstract = {The active global SARS-CoV-2 pandemic caused more than 426 million cases and 5.8 million deaths worldwide. The development of completely new drugs for such a novel disease is a challenging, time intensive process. Despite researchers around the world working on this task, no effective treatments have been developed yet. This emphasizes the importance of drug repurposing, where treatments are found among existing drugs that are meant for different diseases. A common approach to this is based on knowledge graphs, that condense relationships between entities like drugs, diseases and genes. Graph neural networks (GNNs) can then be used for the task at hand by predicting links in such knowledge graphs. Expanding on state-of-the-art GNN research, Doshi et al. recently developed the Dr-COVID model. We further extend their work using additional output interpretation strategies. The best aggregation strategy derives a top-100 ranking of 8,070 candidate drugs, 32 of which are currently being tested in COVID-19-related clinical trials. Moreover, we present an alternative application for the model, the generation of additional candidates based on a given pre-selection of drug candidates using collaborative filtering. In addition, we improved the implementation of the Dr-COVID model by significantly shortening the inference and pre-processing time by exploiting data-parallelism. As drug repurposing is a task that requires high computation and memory resources, we further accelerate the post-processing phase using a new emerging hardware-we propose a new approach to leverage the use of high-capacity Non-Volatile Memory for aggregate drug ranking.}, language = {en} } @article{BlaesiusFriedrichLischeidetal.2022, author = {Bl{\"a}sius, Thomas and Friedrich, Tobias and Lischeid, Julius and Meeks, Kitty and Schirneck, Friedrich Martin}, title = {Efficiently enumerating hitting sets of hypergraphs arising in data profiling}, series = {Journal of computer and system sciences : JCSS}, volume = {124}, journal = {Journal of computer and system sciences : JCSS}, publisher = {Elsevier}, address = {San Diego}, issn = {0022-0000}, doi = {10.1016/j.jcss.2021.10.002}, pages = {192 -- 213}, year = {2022}, abstract = {The transversal hypergraph problem asks to enumerate the minimal hitting sets of a hypergraph. If the solutions have bounded size, Eiter and Gottlob [SICOMP'95] gave an algorithm running in output-polynomial time, but whose space requirement also scales with the output. We improve this to polynomial delay and space. Central to our approach is the extension problem, deciding for a set X of vertices whether it is contained in any minimal hitting set. We show that this is one of the first natural problems to be W[3]-complete. We give an algorithm for the extension problem running in time O(m(vertical bar X vertical bar+1) n) and prove a SETH-lower bound showing that this is close to optimal. We apply our enumeration method to the discovery problem of minimal unique column combinations from data profiling. Our empirical evaluation suggests that the algorithm outperforms its worst-case guarantees on hypergraphs stemming from real-world databases.}, language = {en} } @article{ShiSchirneckFriedrichetal.2018, author = {Shi, Feng and Schirneck, Friedrich Martin and Friedrich, Tobias and K{\"o}tzing, Timo and Neumann, Frank}, title = {Correction to: Reoptimization time analysis of evolutionary algorithms on linear functions under dynamic uniform constraints}, series = {Algorithmica : an international journal in computer science}, volume = {82}, journal = {Algorithmica : an international journal in computer science}, number = {10}, publisher = {Springer}, address = {New York}, issn = {0178-4617}, doi = {10.1007/s00453-020-00739-x}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-605295}, pages = {3117 -- 3123}, year = {2018}, language = {en} } @article{BlaesiusFreibergerFriedrichetal.2022, author = {Bl{\"a}sius, Thomas and Freiberger, Cedric and Friedrich, Tobias and Katzmann, Maximilian and Montenegro-Retana, Felix and Thieffry, Marianne}, title = {Efficient Shortest Paths in Scale-Free Networks with Underlying Hyperbolic Geometry}, series = {ACM Transactions on Algorithms}, volume = {18}, journal = {ACM Transactions on Algorithms}, number = {2}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {1549-6325}, doi = {10.1145/3516483}, pages = {1 -- 32}, year = {2022}, abstract = {A standard approach to accelerating shortest path algorithms on networks is the bidirectional search, which explores the graph from the start and the destination, simultaneously. In practice this strategy performs particularly well on scale-free real-world networks. Such networks typically have a heterogeneous degree distribution (e.g., a power-law distribution) and high clustering (i.e., vertices with a common neighbor are likely to be connected themselves). These two properties can be obtained by assuming an underlying hyperbolic geometry.
To explain the observed behavior of the bidirectional search, we analyze its running time on hyperbolic random graphs and prove that it is (O) over tilde (n(2-1/alpha) + n(1/(2 alpha)) + delta(max)) with high probability, where alpha is an element of (1/2, 1) controls the power-law exponent of the degree distribution, and dmax is the maximum degree. This bound is sublinear, improving the obvious worst-case linear bound. Although our analysis depends on the underlying geometry, the algorithm itself is oblivious to it.}, language = {en} } @article{CaselFischbeckFriedrichetal.2022, author = {Casel, Katrin and Fischbeck, Philipp and Friedrich, Tobias and G{\"o}bel, Andreas and Lagodzinski, Julius Albert Gregor}, title = {Zeros and approximations of Holant polynomials on the complex plane}, series = {Computational complexity : CC}, volume = {31}, journal = {Computational complexity : CC}, number = {2}, publisher = {Springer}, address = {Basel}, issn = {1016-3328}, doi = {10.1007/s00037-022-00226-5}, pages = {52}, year = {2022}, abstract = {We present fully polynomial time approximation schemes for a broad class of Holant problems with complex edge weights, which we call Holant polynomials. We transform these problems into partition functions of abstract combinatorial structures known as polymers in statistical physics. Our method involves establishing zero-free regions for the partition functions of polymer models and using the most significant terms of the cluster expansion to approximate them. Results of our technique include new approximation and sampling algorithms for a diverse class of Holant polynomials in the low-temperature regime (i.e. small external field) and approximation algorithms for general Holant problems with small signature weights. Additionally, we give randomised approximation and sampling algorithms with faster running times for more restrictive classes. Finally, we improve the known zero-free regions for a perfect matching polynomial.}, language = {en} } @article{BlaesiusFriedrichSchirneck2021, author = {Blaesius, Thomas and Friedrich, Tobias and Schirneck, Friedrich Martin}, title = {The complexity of dependency detection and discovery in relational databases}, series = {Theoretical computer science}, volume = {900}, journal = {Theoretical computer science}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0304-3975}, doi = {10.1016/j.tcs.2021.11.020}, pages = {79 -- 96}, year = {2021}, abstract = {Multi-column dependencies in relational databases come associated with two different computational tasks. The detection problem is to decide whether a dependency of a certain type and size holds in a given database, the discovery problem asks to enumerate all valid dependencies of that type. We settle the complexity of both of these problems for unique column combinations (UCCs), functional dependencies (FDs), and inclusion dependencies (INDs). We show that the detection of UCCs and FDs is W[2]-complete when parameterized by the solution size. The discovery of inclusion-wise minimal UCCs is proven to be equivalent under parsimonious reductions to the transversal hypergraph problem of enumerating the minimal hitting sets of a hypergraph. The discovery of FDs is equivalent to the simultaneous enumeration of the hitting sets of multiple input hypergraphs. We further identify the detection of INDs as one of the first natural W[3]-complete problems. The discovery of maximal INDs is shown to be equivalent to enumerating the maximal satisfying assignments of antimonotone, 3-normalized Boolean formulas.}, language = {en} } @article{RoostapourNeumannNeumannetal.2022, author = {Roostapour, Vahid and Neumann, Aneta and Neumann, Frank and Friedrich, Tobias}, title = {Pareto optimization for subset selection with dynamic cost constraints}, series = {Artificial intelligence}, volume = {302}, journal = {Artificial intelligence}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0004-3702}, doi = {10.1016/j.artint.2021.103597}, pages = {17}, year = {2022}, abstract = {We consider the subset selection problem for function f with constraint bound B that changes over time. Within the area of submodular optimization, various greedy approaches are commonly used. For dynamic environments we observe that the adaptive variants of these greedy approaches are not able to maintain their approximation quality. Investigating the recently introduced POMC Pareto optimization approach, we show that this algorithm efficiently computes a phi=(alpha(f)/2)(1 - 1/e(alpha)f)-approximation, where alpha(f) is the submodularity ratio of f, for each possible constraint bound b <= B. Furthermore, we show that POMC is able to adapt its set of solutions quickly in the case that B increases. Our experimental investigations for the influence maximization in social networks show the advantage of POMC over generalized greedy algorithms. We also consider EAMC, a new evolutionary algorithm with polynomial expected time guarantee to maintain phi approximation ratio, and NSGA-II with two different population sizes as advanced multi-objective optimization algorithm, to demonstrate their challenges in optimizing the maximum coverage problem. Our empirical analysis shows that, within the same number of evaluations, POMC is able to perform as good as NSGA-II under linear constraint, while EAMC performs significantly worse than all considered algorithms in most cases.}, language = {en} }