@article{BirnickBlaesiusFriedrichetal.2020, author = {Birnick, Johann and Bl{\"a}sius, Thomas and Friedrich, Tobias and Naumann, Felix and Papenbrock, Thorsten and Schirneck, Friedrich Martin}, title = {Hitting set enumeration with partial information for unique column combination discovery}, series = {Proceedings of the VLDB Endowment}, volume = {13}, journal = {Proceedings of the VLDB Endowment}, number = {11}, publisher = {Association for Computing Machinery}, address = {[New York, NY]}, issn = {2150-8097}, doi = {10.14778/3407790.3407824}, pages = {2270 -- 2283}, year = {2020}, abstract = {Unique column combinations (UCCs) are a fundamental concept in relational databases. They identify entities in the data and support various data management activities. Still, UCCs are usually not explicitly defined and need to be discovered. State-of-the-art data profiling algorithms are able to efficiently discover UCCs in moderately sized datasets, but they tend to fail on large and, in particular, on wide datasets due to run time and memory limitations.
In this paper, we introduce HPIValid, a novel UCC discovery algorithm that implements a faster and more resource-saving search strategy. HPIValid models the metadata discovery as a hitting set enumeration problem in hypergraphs. In this way, it combines efficient discovery techniques from data profiling research with the most recent theoretical insights into enumeration algorithms. Our evaluation shows that HPIValid is not only orders of magnitude faster than related work, it also has a much smaller memory footprint.}, language = {en} } @article{MassieRyabovBlasiusetal.2013, author = {Massie, Thomas Michael and Ryabov, Alexei and Blasius, Bernd and Weithoff, Guntram and Gaedke, Ursula}, title = {Complex transient dynamics of stage-structured populations in response to environmental changes}, series = {The American naturalist : a bi-monthly journal devoted to the advancement and correlation of the biological sciences}, volume = {182}, journal = {The American naturalist : a bi-monthly journal devoted to the advancement and correlation of the biological sciences}, number = {1}, publisher = {Univ. of Chicago Press}, address = {Chicago}, issn = {0003-0147}, doi = {10.1086/670590}, pages = {103 -- 119}, year = {2013}, abstract = {Stage structures of populations can have a profound influence on their dynamics. However, not much is known about the transient dynamics that follow a disturbance in such systems. Here we combined chemostat experiments with dynamical modeling to study the response of the phytoplankton species Chlorella vulgaris to press perturbations. From an initially stable steady state, we altered either the concentration or dilution rate of a growth-limiting resource. This disturbance induced a complex transient response-characterized by the possible onset of oscillations-before population numbers relaxed to a new steady state. Thus, cell numbers could initially change in the opposite direction of the long-term change. We present quantitative indexes to characterize the transients and to show that the dynamic response is dependent on the degree of synchronization among life stages, which itself depends on the state of the population before perturbation. That is, we show how identical future steady states can be approached via different transients depending on the initial population structure. Our experimental results are supported by a size-structured model that accounts for interplay between cell-cycle and population-level processes and that includes resource-dependent variability in cell size. Our results should be relevant to other populations with a stage structure including organisms of higher order.}, language = {en} } @article{MassieWeithoffKucklaenderetal.2015, author = {Massie, Thomas Michael and Weithoff, Guntram and Kucklaender, Nina and Gaedke, Ursula and Blasius, Bernd}, title = {Enhanced Moran effect by spatial variation in environmental autocorrelation}, series = {Nature Communications}, volume = {6}, journal = {Nature Communications}, publisher = {Nature Publ. Group}, address = {London}, issn = {2041-1723}, doi = {10.1038/ncomms6993}, pages = {8}, year = {2015}, abstract = {Spatial correlations in environmental stochasticity can synchronize populations over wide areas, a phenomenon known as the Moran effect. The Moran effect has been confirmed in field, laboratory and theoretical investigations. Little is known, however, about the Moran effect in a common ecological case, when environmental variation is temporally autocorrelated and this autocorrelation varies spatially. Here we perform chemostat experiments to investigate the temporal response of independent phytoplankton populations to autocorrelated stochastic forcing. In contrast to naive expectation, two populations without direct coupling can be more strongly correlated than their environmental forcing (enhanced Moran effect), if the stochastic variations differ in their autocorrelation. Our experimental findings are in agreement with numerical simulations and analytical calculations. The enhanced Moran effect is robust to changes in population dynamics, noise spectra and different measures of correlation-suggesting that noise-induced synchrony may play a larger role for population dynamics than previously thought.}, language = {en} } @article{BlaesiusFreibergerFriedrichetal.2022, author = {Bl{\"a}sius, Thomas and Freiberger, Cedric and Friedrich, Tobias and Katzmann, Maximilian and Montenegro-Retana, Felix and Thieffry, Marianne}, title = {Efficient Shortest Paths in Scale-Free Networks with Underlying Hyperbolic Geometry}, series = {ACM Transactions on Algorithms}, volume = {18}, journal = {ACM Transactions on Algorithms}, number = {2}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {1549-6325}, doi = {10.1145/3516483}, pages = {1 -- 32}, year = {2022}, abstract = {A standard approach to accelerating shortest path algorithms on networks is the bidirectional search, which explores the graph from the start and the destination, simultaneously. In practice this strategy performs particularly well on scale-free real-world networks. Such networks typically have a heterogeneous degree distribution (e.g., a power-law distribution) and high clustering (i.e., vertices with a common neighbor are likely to be connected themselves). These two properties can be obtained by assuming an underlying hyperbolic geometry.
To explain the observed behavior of the bidirectional search, we analyze its running time on hyperbolic random graphs and prove that it is (O) over tilde (n(2-1/alpha) + n(1/(2 alpha)) + delta(max)) with high probability, where alpha is an element of (1/2, 1) controls the power-law exponent of the degree distribution, and dmax is the maximum degree. This bound is sublinear, improving the obvious worst-case linear bound. Although our analysis depends on the underlying geometry, the algorithm itself is oblivious to it.}, language = {en} } @article{BlaesiusFriedrichLischeidetal.2022, author = {Bl{\"a}sius, Thomas and Friedrich, Tobias and Lischeid, Julius and Meeks, Kitty and Schirneck, Friedrich Martin}, title = {Efficiently enumerating hitting sets of hypergraphs arising in data profiling}, series = {Journal of computer and system sciences : JCSS}, volume = {124}, journal = {Journal of computer and system sciences : JCSS}, publisher = {Elsevier}, address = {San Diego}, issn = {0022-0000}, doi = {10.1016/j.jcss.2021.10.002}, pages = {192 -- 213}, year = {2022}, abstract = {The transversal hypergraph problem asks to enumerate the minimal hitting sets of a hypergraph. If the solutions have bounded size, Eiter and Gottlob [SICOMP'95] gave an algorithm running in output-polynomial time, but whose space requirement also scales with the output. We improve this to polynomial delay and space. Central to our approach is the extension problem, deciding for a set X of vertices whether it is contained in any minimal hitting set. We show that this is one of the first natural problems to be W[3]-complete. We give an algorithm for the extension problem running in time O(m(vertical bar X vertical bar+1) n) and prove a SETH-lower bound showing that this is close to optimal. We apply our enumeration method to the discovery problem of minimal unique column combinations from data profiling. Our empirical evaluation suggests that the algorithm outperforms its worst-case guarantees on hypergraphs stemming from real-world databases.}, language = {en} } @article{BlaesiusFriedrichKrejcaetal.2022, author = {Bl{\"a}sius, Thomas and Friedrich, Tobias and Krejca, Martin S. and Molitor, Louise}, title = {The impact of geometry on monochrome regions in the flip Schelling process}, series = {Computational geometry}, volume = {108}, journal = {Computational geometry}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0925-7721}, doi = {10.1016/j.comgeo.2022.101902}, year = {2022}, abstract = {Schelling's classical segregation model gives a coherent explanation for the wide-spread phenomenon of residential segregation. We introduce an agent-based saturated open-city variant, the Flip Schelling Process (FSP), in which agents, placed on a graph, have one out of two types and, based on the predominant type in their neighborhood, decide whether to change their types; similar to a new agent arriving as soon as another agent leaves the vertex. We investigate the probability that an edge {u,v} is monochrome, i.e., that both vertices u and v have the same type in the FSP, and we provide a general framework for analyzing the influence of the underlying graph topology on residential segregation. In particular, for two adjacent vertices, we show that a highly decisive common neighborhood, i.e., a common neighborhood where the absolute value of the difference between the number of vertices with different types is high, supports segregation and, moreover, that large common neighborhoods are more decisive. As an application, we study the expected behavior of the FSP on two common random graph models with and without geometry: (1) For random geometric graphs, we show that the existence of an edge {u,v} makes a highly decisive common neighborhood for u and v more likely. Based on this, we prove the existence of a constant c>0 such that the expected fraction of monochrome edges after the FSP is at least 1/2+c. (2) For Erdős-R{\´e}nyi graphs we show that large common neighborhoods are unlikely and that the expected fraction of monochrome edges after the FSP is at most 1/2+o(1). Our results indicate that the cluster structure of the underlying graph has a significant impact on the obtained segregation strength.}, language = {en} }