@article{DoerrKrejca2020, author = {Doerr, Benjamin and Krejca, Martin S.}, title = {Significance-based estimation-of-distribution algorithms}, series = {IEEE transactions on evolutionary computation}, volume = {24}, journal = {IEEE transactions on evolutionary computation}, number = {6}, publisher = {Institute of Electrical and Electronics Engineers}, address = {New York, NY}, issn = {1089-778X}, doi = {10.1109/TEVC.2019.2956633}, pages = {1025 -- 1034}, year = {2020}, abstract = {Estimation-of-distribution algorithms (EDAs) are randomized search heuristics that create a probabilistic model of the solution space, which is updated iteratively, based on the quality of the solutions sampled according to the model. As previous works show, this iteration-based perspective can lead to erratic updates of the model, in particular, to bit-frequencies approaching a random boundary value. In order to overcome this problem, we propose a new EDA based on the classic compact genetic algorithm (cGA) that takes into account a longer history of samples and updates its model only with respect to information which it classifies as statistically significant. We prove that this significance-based cGA (sig-cGA) optimizes the commonly regarded benchmark functions OneMax (OM), LeadingOnes, and BinVal all in quasilinear time, a result shown for no other EDA or evolutionary algorithm so far. For the recently proposed stable compact genetic algorithm-an EDA that tries to prevent erratic model updates by imposing a bias to the uniformly distributed model-we prove that it optimizes OM only in a time exponential in its hypothetical population size. Similarly, we show that the convex search algorithm cannot optimize OM in polynomial time.}, language = {en} } @article{DoerrKrejca2021, author = {Doerr, Benjamin and Krejca, Martin Stefan}, title = {A simplified run time analysis of the univariate marginal distribution algorithm on LeadingOnes}, series = {Theoretical computer science}, volume = {851}, journal = {Theoretical computer science}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0304-3975}, doi = {10.1016/j.tcs.2020.11.028}, pages = {121 -- 128}, year = {2021}, abstract = {With elementary means, we prove a stronger run time guarantee for the univariate marginal distribution algorithm (UMDA) optimizing the LEADINGONES benchmark function in the desirable regime with low genetic drift. If the population size is at least quasilinear, then, with high probability, the UMDA samples the optimum in a number of iterations that is linear in the problem size divided by the logarithm of the UMDA's selection rate. This improves over the previous guarantee, obtained by Dang and Lehre (2015) via the deep level-based population method, both in terms of the run time and by demonstrating further run time gains from small selection rates. Under similar assumptions, we prove a lower bound that matches our upper bound up to constant factors.}, language = {en} } @article{DoerrKoetzing2022, author = {Doerr, Benjamin and K{\"o}tzing, Timo}, title = {Lower bounds from fitness levels made easy}, series = {Algorithmica}, journal = {Algorithmica}, publisher = {Springer}, address = {New York}, issn = {0178-4617}, doi = {10.1007/s00453-022-00952-w}, pages = {29}, year = {2022}, abstract = {One of the first and easy to use techniques for proving run time bounds for evolutionary algorithms is the so-called method of fitness levels by Wegener. It uses a partition of the search space into a sequence of levels which are traversed by the algorithm in increasing order, possibly skipping levels. An easy, but often strong upper bound for the run time can then be derived by adding the reciprocals of the probabilities to leave the levels (or upper bounds for these). Unfortunately, a similarly effective method for proving lower bounds has not yet been established. The strongest such method, proposed by Sudholt (2013), requires a careful choice of the viscosity parameters gamma(i), j, 0 <= i < j <= n. In this paper we present two new variants of the method, one for upper and one for lower bounds. Besides the level leaving probabilities, they only rely on the probabilities that levels are visited at all. We show that these can be computed or estimated without greater difficulties and apply our method to reprove the following known results in an easy and natural way. (i) The precise run time of the (1+1) EA on LEADINGONES. (ii) A lower bound for the run time of the (1+1) EA on ONEMAX, tight apart from an O(n) term. (iii) A lower bound for the run time of the (1+1) EA on long k-paths (which differs slightly from the previous result due to a small error in the latter). We also prove a tighter lower bound for the run time of the (1+1) EA on jump functions by showing that, regardless of the jump size, only with probability O(2(-n)) the algorithm can avoid to jump over the valley of low fitness.}, language = {en} } @article{DoerrKoetzing2020, author = {Doerr, Benjamin and K{\"o}tzing, Timo}, title = {Multiplicative Up-Drift}, series = {Algorithmica}, volume = {83}, journal = {Algorithmica}, number = {10}, publisher = {Springer}, address = {New York}, issn = {0178-4617}, doi = {10.1007/s00453-020-00775-7}, pages = {3017 -- 3058}, year = {2020}, abstract = {Drift analysis aims at translating the expected progress of an evolutionary algorithm (or more generally, a random process) into a probabilistic guarantee on its run time (hitting time). So far, drift arguments have been successfully employed in the rigorous analysis of evolutionary algorithms, however, only for the situation that the progress is constant or becomes weaker when approaching the target. Motivated by questions like how fast fit individuals take over a population, we analyze random processes exhibiting a (1+delta)-multiplicative growth in expectation. We prove a drift theorem translating this expected progress into a hitting time. This drift theorem gives a simple and insightful proof of the level-based theorem first proposed by Lehre (2011). Our version of this theorem has, for the first time, the best-possible near-linear dependence on 1/delta} (the previous results had an at least near-quadratic dependence), and it only requires a population size near-linear in delta (this was super-quadratic in previous results). These improvements immediately lead to stronger run time guarantees for a number of applications. We also discuss the case of large delta and show stronger results for this setting.}, language = {en} } @article{DoerrKoetzingLagodzinskietal.2020, author = {Doerr, Benjamin and K{\"o}tzing, Timo and Lagodzinski, Gregor J. A. and Lengler, Johannes}, title = {The impact of lexicographic parsimony pressure for ORDER/MAJORITY on the run time}, series = {Theoretical computer science : the journal of the EATCS}, volume = {816}, journal = {Theoretical computer science : the journal of the EATCS}, publisher = {Elsevier}, address = {Amsterdam [u.a.]}, issn = {0304-3975}, doi = {10.1016/j.tcs.2020.01.011}, pages = {144 -- 168}, year = {2020}, abstract = {While many optimization problems work with a fixed number of decision variables and thus a fixed-length representation of possible solutions, genetic programming (GP) works on variable-length representations. A naturally occurring problem is that of bloat, that is, the unnecessary growth of solution lengths, which may slow down the optimization process. So far, the mathematical runtime analysis could not deal well with bloat and required explicit assumptions limiting bloat. In this paper, we provide the first mathematical runtime analysis of a GP algorithm that does not require any assumptions on the bloat. Previous performance guarantees were only proven conditionally for runs in which no strong bloat occurs. Together with improved analyses for the case with bloat restrictions our results show that such assumptions on the bloat are not necessary and that the algorithm is efficient without explicit bloat control mechanism. More specifically, we analyzed the performance of the (1 + 1) GP on the two benchmark functions ORDER and MAJORITY. When using lexicographic parsimony pressure as bloat control, we show a tight runtime estimate of O(T-init + nlogn) iterations both for ORDER and MAJORITY. For the case without bloat control, the bounds O(T-init logT(i)(nit) + n(logn)(3)) and Omega(T-init + nlogn) (and Omega(T-init log T-init) for n = 1) hold for MAJORITY(1).}, language = {en} } @article{DoerrNeumannSutton2016, author = {Doerr, Benjamin and Neumann, Frank and Sutton, Andrew M.}, title = {Time Complexity Analysis of Evolutionary Algorithms on Random Satisfiable k-CNF Formulas}, series = {Algorithmica : an international journal in computer science}, volume = {78}, journal = {Algorithmica : an international journal in computer science}, publisher = {Springer}, address = {New York}, issn = {0178-4617}, doi = {10.1007/s00453-016-0190-3}, pages = {561 -- 586}, year = {2016}, abstract = {We contribute to the theoretical understanding of randomized search heuristics by investigating their optimization behavior on satisfiable random k-satisfiability instances both in the planted solution model and the uniform model conditional on satisfiability. Denoting the number of variables by n, our main technical result is that the simple () evolutionary algorithm with high probability finds a satisfying assignment in time when the clause-variable density is at least logarithmic. For low density instances, evolutionary algorithms seem to be less effective, and all we can show is a subexponential upper bound on the runtime for densities below . We complement these mathematical results with numerical experiments on a broader density spectrum. They indicate that, indeed, the () EA is less efficient on lower densities. Our experiments also suggest that the implicit constants hidden in our main runtime guarantee are low. Our main result extends and considerably improves the result obtained by Sutton and Neumann (Lect Notes Comput Sci 8672:942-951, 2014) in terms of runtime, minimum density, and clause length. These improvements are made possible by establishing a close fitness-distance correlation in certain parts of the search space. This approach might be of independent interest and could be useful for other average-case analyses of randomized search heuristics. While the notion of a fitness-distance correlation has been around for a long time, to the best of our knowledge, this is the first time that fitness-distance correlation is explicitly used to rigorously prove a performance statement for an evolutionary algorithm.}, language = {en} }