@article{MillerSchwarz2011, author = {Miller, Jeff and Schwarz, Wolfgang}, title = {Aggregate and individual replication probability within an explicit model of the research process}, series = {Psychological methods}, volume = {16}, journal = {Psychological methods}, number = {3}, publisher = {American Psychological Association}, address = {Washington}, issn = {1082-989X}, doi = {10.1037/a0023347}, pages = {337 -- 360}, year = {2011}, abstract = {We study a model of the research process in which the true effect size, the replication jitter due to changes in experimental procedure, and the statistical error of effect size measurement are all normally distributed random variables. Within this model, we analyze the probability of successfully replicating an initial experimental result by obtaining either a statistically significant result in the same direction or any effect in that direction. We analyze both the probability of successfully replicating a particular experimental effect (i.e., the individual replication probability) and the average probability of successful replication across different studies within some research context (i.e., the aggregate replication probability), and we identify the conditions under which the latter can be approximated using the formulas of Killeen (2005a, 2007). We show how both of these probabilities depend on parameters of the research context that would rarely be known in practice. In addition, we show that the statistical uncertainty associated with the size of an initial observed effect would often prevent accurate estimation of the desired individual replication probability even if these research context parameters were known exactly. We conclude that accurate estimates of replication probability are generally unattainable.}, language = {en} } @article{MarionMcInernyPageletal.2012, author = {Marion, Glenn and McInerny, Greg J. and Pagel, J{\"o}rn and Catterall, Stephen and Cook, Alex R. and Hartig, Florian and O\&rsquo, and Hara, Robert B.}, title = {Parameter and uncertainty estimation for process-oriented population and distribution models: data, statistics and the niche}, series = {JOURNAL OF BIOGEOGRAPHY}, volume = {39}, journal = {JOURNAL OF BIOGEOGRAPHY}, number = {12}, publisher = {WILEY-BLACKWELL}, address = {HOBOKEN}, issn = {0305-0270}, doi = {10.1111/j.1365-2699.2012.02772.x}, pages = {2225 -- 2239}, year = {2012}, abstract = {The spatial distribution of a species is determined by dynamic processes such as reproduction, mortality and dispersal. Conventional static species distribution models (SDMs) do not incorporate these processes explicitly. This limits their applicability, particularly for non-equilibrium situations such as invasions or climate change. In this paper we show how dynamic SDMs can be formulated and fitted to data within a Bayesian framework. Our focus is on discrete state-space Markov process models which provide a flexible framework to account for stochasticity in key demographic processes, including dispersal, growth and competition. We show how to construct likelihood functions for such models (both discrete and continuous time versions) and how these can be combined with suitable observation models to conduct Bayesian parameter inference using computational techniques such as Markov chain Monte Carlo. We illustrate the current state-of-the-art with three contrasting examples using both simulated and empirical data. The use of simulated data allows the robustness of the methods to be tested with respect to deficiencies in both data and model. These examples show how mechanistic understanding of the processes that determine distribution and abundance can be combined with different sources of information at a range of spatial and temporal scales. Application of such techniques will enable more reliable inference and projections, e.g. under future climate change scenarios than is possible with purely correlative approaches. Conversely, confronting such process-oriented niche models with abundance and distribution data will test current understanding and may ultimately feedback to improve underlying ecological theory.}, language = {en} } @article{Reich2013, author = {Reich, Sebastian}, title = {A nonparametric ensemble transform method for bayesian inference}, series = {SIAM journal on scientific computing}, volume = {35}, journal = {SIAM journal on scientific computing}, number = {4}, publisher = {Society for Industrial and Applied Mathematics}, address = {Philadelphia}, issn = {1064-8275}, doi = {10.1137/130907367}, pages = {A2013 -- A2024}, year = {2013}, abstract = {Many applications, such as intermittent data assimilation, lead to a recursive application of Bayesian inference within a Monte Carlo context. Popular data assimilation algorithms include sequential Monte Carlo methods and ensemble Kalman filters (EnKFs). These methods differ in the way Bayesian inference is implemented. Sequential Monte Carlo methods rely on importance sampling combined with a resampling step, while EnKFs utilize a linear transformation of Monte Carlo samples based on the classic Kalman filter. While EnKFs have proven to be quite robust even for small ensemble sizes, they are not consistent since their derivation relies on a linear regression ansatz. In this paper, we propose another transform method, which does not rely on any a priori assumptions on the underlying prior and posterior distributions. The new method is based on solving an optimal transportation problem for discrete random variables.}, language = {en} } @article{SchuettHarmelingMackeetal.2016, author = {Sch{\"u}tt, Heiko Herbert and Harmeling, Stefan and Macke, Jakob H. and Wichmann, Felix A.}, title = {Painfree and accurate Bayesian estimation of psychometric functions for (potentially) overdispersed data}, series = {Vision research : an international journal for functional aspects of vision.}, volume = {122}, journal = {Vision research : an international journal for functional aspects of vision.}, publisher = {Elsevier}, address = {Oxford}, issn = {0042-6989}, doi = {10.1016/j.visres.2016.02.002}, pages = {105 -- 123}, year = {2016}, abstract = {The psychometric function describes how an experimental variable, such as stimulus strength, influences the behaviour of an observer. Estimation of psychometric functions from experimental data plays a central role in fields such as psychophysics, experimental psychology and in the behavioural neurosciences. Experimental data may exhibit substantial overdispersion, which may result from non-stationarity in the behaviour of observers. Here we extend the standard binomial model which is typically used for psychometric function estimation to a beta-binomial model. We show that the use of the beta-binomial model makes it possible to determine accurate credible intervals even in data which exhibit substantial overdispersion. This goes beyond classical measures for overdispersion goodness-of-fit which can detect overdispersion but provide no method to do correct inference for overdispersed data. We use Bayesian inference methods for estimating the posterior distribution of the parameters of the psychometric function. Unlike previous Bayesian psychometric inference methods our software implementation-psignifit 4 performs numerical integration of the posterior within automatically determined bounds. This avoids the use of Markov chain Monte Carlo (MCMC) methods typically requiring expert knowledge. Extensive numerical tests show the validity of the approach and we discuss implications of overdispersion for experimental design. A comprehensive MATLAB toolbox implementing the method is freely available; a python implementation providing the basic capabilities is also available. (C) 2016 The Authors. Published by Elsevier Ltd.}, language = {en} } @article{GianniotisSchnoerrMolkenthinetal.2016, author = {Gianniotis, Nikolaos and Schnoerr, Christoph and Molkenthin, Christian and Bora, Sanjay Singh}, title = {Approximate variational inference based on a finite sample of Gaussian latent variables}, series = {Pattern Analysis \& Applications}, volume = {19}, journal = {Pattern Analysis \& Applications}, publisher = {Springer}, address = {New York}, issn = {1433-7541}, doi = {10.1007/s10044-015-0496-9}, pages = {475 -- 485}, year = {2016}, abstract = {Variational methods are employed in situations where exact Bayesian inference becomes intractable due to the difficulty in performing certain integrals. Typically, variational methods postulate a tractable posterior and formulate a lower bound on the desired integral to be approximated, e.g. marginal likelihood. The lower bound is then optimised with respect to its free parameters, the so-called variational parameters. However, this is not always possible as for certain integrals it is very challenging (or tedious) to come up with a suitable lower bound. Here, we propose a simple scheme that overcomes some of the awkward cases where the usual variational treatment becomes difficult. The scheme relies on a rewriting of the lower bound on the model log-likelihood. We demonstrate the proposed scheme on a number of synthetic and real examples, as well as on a real geophysical model for which the standard variational approaches are inapplicable.}, language = {en} } @article{AcevedoDeWiljesReich2017, author = {Acevedo, Walter and De Wiljes, Jana and Reich, Sebastian}, title = {Second-order accurate ensemble transform particle filters}, series = {SIAM journal on scientific computing}, volume = {39}, journal = {SIAM journal on scientific computing}, number = {5}, publisher = {Society for Industrial and Applied Mathematics}, address = {Philadelphia}, issn = {1064-8275}, doi = {10.1137/16M1095184}, pages = {A1834 -- A1850}, year = {2017}, abstract = {Particle filters (also called sequential Monte Carlo methods) are widely used for state and parameter estimation problems in the context of nonlinear evolution equations. The recently proposed ensemble transform particle filter (ETPF) [S. Reich, SIAM T. Sci. Comput., 35, (2013), pp. A2013-A2014[ replaces the resampling step of a standard particle filter by a linear transformation which allows for a hybridization of particle filters with ensemble Kalman filters and renders the resulting hybrid filters applicable to spatially extended systems. However, the linear transformation step is computationally expensive and leads to an underestimation of the ensemble spread for small and moderate ensemble sizes. Here we address both of these shortcomings by developing second order accurate extensions of the ETPF. These extensions allow one in particular to replace the exact solution of a linear transport problem by its Sinkhorn approximation. It is also demonstrated that the nonlinear ensemble transform filter arises as a special case of our general framework. We illustrate the performance of the second-order accurate filters for the chaotic Lorenz-63 and Lorenz-96 models and a dynamic scene-viewing model. The numerical results for the Lorenz-63 and Lorenz-96 models demonstrate that significant accuracy improvements can be achieved in comparison to a standard ensemble Kalman filter and the ETPF for small to moderate ensemble sizes. The numerical results for the scene-viewing model reveal, on the other hand, that second-order corrections can lead to statistically inconsistent samples from the posterior parameter distribution.}, language = {en} } @article{RosenbaumRaatzWeithoffetal.2019, author = {Rosenbaum, Benjamin and Raatz, Michael and Weithoff, Guntram and Fussmann, Gregor F. and Gaedke, Ursula}, title = {Estimating parameters from multiple time series of population dynamics using bayesian inference}, series = {Frontiers in ecology and evolution}, volume = {6}, journal = {Frontiers in ecology and evolution}, publisher = {Frontiers Research Foundation}, address = {Lausanne}, issn = {2296-701X}, doi = {10.3389/fevo.2018.00234}, pages = {14}, year = {2019}, abstract = {Empirical time series of interacting entities, e.g., species abundances, are highly useful to study ecological mechanisms. Mathematical models are valuable tools to further elucidate those mechanisms and underlying processes. However, obtaining an agreement between model predictions and experimental observations remains a demanding task. As models always abstract from reality one parameter often summarizes several properties. Parameter measurements are performed in additional experiments independent of the ones delivering the time series. Transferring these parameter values to different settings may result in incorrect parametrizations. On top of that, the properties of organisms and thus the respective parameter values may vary considerably. These issues limit the use of a priori model parametrizations. In this study, we present a method suited for a direct estimation of model parameters and their variability from experimental time series data. We combine numerical simulations of a continuous-time dynamical population model with Bayesian inference, using a hierarchical framework that allows for variability of individual parameters. The method is applied to a comprehensive set of time series from a laboratory predator-prey system that features both steady states and cyclic population dynamics. Our model predictions are able to reproduce both steady states and cyclic dynamics of the data. Additionally to the direct estimates of the parameter values, the Bayesian approach also provides their uncertainties. We found that fitting cyclic population dynamics, which contain more information on the process rates than steady states, yields more precise parameter estimates. We detected significant variability among parameters of different time series and identified the variation in the maximum growth rate of the prey as a source for the transition from steady states to cyclic dynamics. By lending more flexibility to the model, our approach facilitates parametrizations and shows more easily which patterns in time series can be explained also by simple models. Applying Bayesian inference and dynamical population models in conjunction may help to quantify the profound variability in organismal properties in nature.}, language = {en} } @article{SavoyHesse2019, author = {Savoy, Heather and Heße, Falk}, title = {Dimension reduction for integrating data series in Bayesian inversion of geostatistical models}, series = {Stochastic environmental research and risk assessment}, volume = {33}, journal = {Stochastic environmental research and risk assessment}, number = {7}, publisher = {Springer}, address = {New York}, issn = {1436-3240}, doi = {10.1007/s00477-019-01697-9}, pages = {1327 -- 1344}, year = {2019}, abstract = {This study explores methods with which multidimensional data, e.g. time series, can be effectively incorporated into a Bayesian framework for inferring geostatistical parameters. Such series are difficult to use directly in the likelihood estimation procedure due to their high dimensionality; thus, a dimension reduction approach is taken to utilize these measurements in the inference. Two synthetic scenarios from hydrology are explored in which pumping drawdown and concentration breakthrough curves are used to infer the global mean of a log-normally distributed hydraulic conductivity field. Both cases pursue the use of a parametric model to represent the shape of the observed time series with physically-interpretable parameters (e.g. the time and magnitude of a concentration peak), which is compared to subsets of the observations with similar dimensionality. The results from both scenarios highlight the effectiveness for the shape-matching models to reduce dimensionality from 100+ dimensions down to less than five. The models outperform the alternative subset method, especially when the observations are noisy. This approach to incorporating time series observations in the Bayesian framework for inferring geostatistical parameters allows for high-dimensional observations to be faithfully represented in lower-dimensional space for the non-parametric likelihood estimation procedure, which increases the applicability of the framework to more observation types. Although the scenarios are both from hydrogeology, the methodology is general in that no assumptions are made about the subject domain. Any application that requires the inference of geostatistical parameters using series in either time of space can use the approach described in this paper.}, language = {en} } @article{GarbunoInigoNueskenReich2020, author = {Garbuno-Inigo, Alfredo and N{\"u}sken, Nikolas and Reich, Sebastian}, title = {Affine invariant interacting Langevin dynamics for Bayesian inference}, series = {SIAM journal on applied dynamical systems}, volume = {19}, journal = {SIAM journal on applied dynamical systems}, number = {3}, publisher = {Society for Industrial and Applied Mathematics}, address = {Philadelphia}, issn = {1536-0040}, doi = {10.1137/19M1304891}, pages = {1633 -- 1658}, year = {2020}, abstract = {We propose a computational method (with acronym ALDI) for sampling from a given target distribution based on first-order (overdamped) Langevin dynamics which satisfies the property of affine invariance. The central idea of ALDI is to run an ensemble of particles with their empirical covariance serving as a preconditioner for their underlying Langevin dynamics. ALDI does not require taking the inverse or square root of the empirical covariance matrix, which enables application to high-dimensional sampling problems. The theoretical properties of ALDI are studied in terms of nondegeneracy and ergodicity. Furthermore, we study its connections to diffusion on Riemannian manifolds and Wasserstein gradient flows. Bayesian inference serves as a main application area for ALDI. In case of a forward problem with additive Gaussian measurement errors, ALDI allows for a gradient-free approximation in the spirit of the ensemble Kalman filter. A computational comparison between gradient-free and gradient-based ALDI is provided for a PDE constrained Bayesian inverse problem.}, language = {en} } @phdthesis{Thapa2020, author = {Thapa, Samudrajit}, title = {Deciphering anomalous diffusion in complex systems using Bayesian inference and large deviation theory}, pages = {xx, 186}, year = {2020}, abstract = {The development of methods such as super-resolution microscopy (Nobel prize in Chemistry, 2014) and multi-scale computer modelling (Nobel prize in Chemistry, 2013) have provided scientists with powerful tools to study microscopic systems. Sub-micron particles or even fluorescently labelled single molecules can now be tracked for long times in a variety of systems such as living cells, biological membranes, colloidal solutions etc. at spatial and temporal resolutions previously inaccessible. Parallel to such single-particle tracking experiments, super-computing techniques enable simulations of large atomistic or coarse-grained systems such as biologically relevant membranes or proteins from picoseconds to seconds, generating large volume of data. These have led to an unprecedented rise in the number of reported cases of anomalous diffusion wherein the characteristic features of Brownian motion—namely linear growth of the mean squared displacement with time and the Gaussian form of the probability density function (PDF) to find a particle at a given position at some fixed time—are routinely violated. This presents a big challenge in identifying the underlying stochastic process and also estimating the corresponding parameters of the process to completely describe the observed behaviour. Finding the correct physical mechanism which leads to the observed dynamics is of paramount importance, for example, to understand the first-arrival time of transcription factors which govern gene regulation, or the survival probability of a pathogen in a biological cell post drug administration. Statistical Physics provides useful methods that can be applied to extract such vital information. This cumulative dissertation, based on five publications, focuses on the development, implementation and application of such tools with special emphasis on Bayesian inference and large deviation theory. Together with the implementation of Bayesian model comparison and parameter estimation methods for models of diffusion, complementary tools are developed based on different observables and large deviation theory to classify stochastic processes and gather pivotal information. Bayesian analysis of the data of micron-sized particles traced in mucin hydrogels at different pH conditions unveiled several interesting features and we gained insights into, for example, how in going from basic to acidic pH, the hydrogel becomes more heterogeneous and phase separation can set in, leading to observed non-ergodicity (non-equivalence of time and ensemble averages) and non-Gaussian PDF. With large deviation theory based analysis we could detect, for instance, non-Gaussianity in seeming Brownian diffusion of beads in aqueous solution, anisotropic motion of the beads in mucin at neutral pH conditions, and short-time correlations in climate data. Thus through the application of the developed methods to biological and meteorological datasets crucial information is garnered about the underlying stochastic processes and significant insights are obtained in understanding the physical nature of these systems.}, language = {en} }