@article{ThapaParkKimetal.2022,
  author    = {Thapa, Samudrajit and Park, Seongyu and Kim, Yeongjin and Jeon, Jae-Hyung and Metzler, Ralf and Lomholt, Michael A.},
  title     = {Bayesian inference of scaled versus fractional Brownian motion},
  series = {Journal of physics : A, mathematical and theoretical},
  volume    = {55},
  journal   = {Journal of physics : A, mathematical and theoretical},
  number    = {19},
  publisher = {IOP Publ. Ltd.},
  address   = {Bristol},
  issn      = {1751-8113},
  doi       = {10.1088/1751-8121/ac60e7},
  pages     = {21},
  year      = {2022},
  abstract  = {We present a Bayesian inference scheme for scaled Brownian motion, and investigate its performance on synthetic data for parameter estimation and model selection in a combined inference with fractional Brownian motion. We include the possibility of measurement noise in both models. We find that for trajectories of a few hundred time points the procedure is able to resolve well the true model and parameters. Using the prior of the synthetic data generation process also for the inference, the approach is optimal based on decision theory. We include a comparison with inference using a prior different from the data generating one.},
  language  = {en}
}
@phdthesis{Thapa2020,
  author    = {Thapa, Samudrajit},
  title     = {Deciphering anomalous diffusion in complex systems using Bayesian inference and large deviation theory},
  pages     = {xx, 186},
  year      = {2020},
  abstract  = {The development of methods such as super-resolution microscopy (Nobel prize in Chemistry, 2014) and multi-scale computer modelling (Nobel prize in Chemistry, 2013) have provided scientists with powerful tools to study microscopic systems. Sub-micron particles or even fluorescently labelled single molecules can now be tracked for long times in a variety of systems such as living cells, biological membranes, colloidal solutions etc. at spatial and temporal resolutions previously inaccessible. Parallel to such single-particle tracking experiments, super-computing techniques enable simulations of large atomistic or coarse-grained systems such as biologically relevant membranes or proteins from picoseconds to seconds, generating large volume of data. These have led to an unprecedented rise in the number of reported cases of anomalous diffusion wherein the characteristic features of Brownian motion—namely linear growth of the mean squared displacement with time and the Gaussian form of the probability density function (PDF) to find a particle at a given position at some fixed time—are routinely violated. This presents a big challenge in identifying the underlying stochastic process and also estimating the corresponding parameters of the process to completely describe the observed behaviour. Finding the correct physical mechanism which leads to the observed dynamics is of paramount importance, for example, to understand the first-arrival time of transcription factors which govern gene regulation, or the survival probability of a pathogen in a biological cell post drug administration. Statistical Physics provides useful methods that can be applied to extract such vital information. This cumulative dissertation, based on five publications, focuses on the development, implementation and application of such tools with special emphasis on Bayesian inference and large deviation theory. Together with the implementation of Bayesian model comparison and parameter estimation methods for models of diffusion, complementary tools are developed based on different observables and large deviation theory to classify stochastic processes and gather pivotal information. Bayesian analysis of the data of micron-sized particles traced in mucin hydrogels at different pH conditions unveiled several interesting features and we gained insights into, for example, how in going from basic to acidic pH, the hydrogel becomes more heterogeneous and phase separation can set in, leading to observed non-ergodicity (non-equivalence of time and ensemble averages) and non-Gaussian PDF. With large deviation theory based analysis we could detect, for instance, non-Gaussianity in seeming Brownian diffusion of beads in aqueous solution, anisotropic motion of the beads in mucin at neutral pH conditions, and short-time correlations in climate data. Thus through the application of the developed methods to biological and meteorological datasets crucial information is garnered about the underlying stochastic processes and significant insights are obtained in understanding the physical nature of these systems.},
  language  = {en}
}
@article{SeeligRabeMalemShinitskietal.2020,
  author    = {Seelig, Stefan A. and Rabe, Maximilian Michael and Malem-Shinitski, Noa and Risse, Sarah and Reich, Sebastian and Engbert, Ralf},
  title     = {Bayesian parameter estimation for the SWIFT model of eye-movement control during reading},
  series = {Journal of mathematical psychology},
  volume    = {95},
  journal   = {Journal of mathematical psychology},
  publisher = {Elsevier},
  address   = {San Diego},
  issn      = {0022-2496},
  doi       = {10.1016/j.jmp.2019.102313},
  pages     = {32},
  year      = {2020},
  abstract  = {Process-oriented theories of cognition must be evaluated against time-ordered observations. Here we present a representative example for data assimilation of the SWIFT model, a dynamical model of the control of fixation positions and fixation durations during natural reading of single sentences. First, we develop and test an approximate likelihood function of the model, which is a combination of a spatial, pseudo-marginal likelihood and a temporal likelihood obtained by probability density approximation Second, we implement a Bayesian approach to parameter inference using an adaptive Markov chain Monte Carlo procedure. Our results indicate that model parameters can be estimated reliably for individual subjects. We conclude that approximative Bayesian inference represents a considerable step forward for computational models of eye-movement control, where modeling of individual data on the basis of process-based dynamic models has not been possible so far.},
  language  = {en}
}
@article{SchuettHarmelingMackeetal.2016,
  author    = {Sch{\"u}tt, Heiko Herbert and Harmeling, Stefan and Macke, Jakob H. and Wichmann, Felix A.},
  title     = {Painfree and accurate Bayesian estimation of psychometric functions for (potentially) overdispersed data},
  series = {Vision research : an international journal for functional aspects of vision.},
  volume    = {122},
  journal   = {Vision research : an international journal for functional aspects of vision.},
  publisher = {Elsevier},
  address   = {Oxford},
  issn      = {0042-6989},
  doi       = {10.1016/j.visres.2016.02.002},
  pages     = {105 -- 123},
  year      = {2016},
  abstract  = {The psychometric function describes how an experimental variable, such as stimulus strength, influences the behaviour of an observer. Estimation of psychometric functions from experimental data plays a central role in fields such as psychophysics, experimental psychology and in the behavioural neurosciences. Experimental data may exhibit substantial overdispersion, which may result from non-stationarity in the behaviour of observers. Here we extend the standard binomial model which is typically used for psychometric function estimation to a beta-binomial model. We show that the use of the beta-binomial model makes it possible to determine accurate credible intervals even in data which exhibit substantial overdispersion. This goes beyond classical measures for overdispersion goodness-of-fit which can detect overdispersion but provide no method to do correct inference for overdispersed data. We use Bayesian inference methods for estimating the posterior distribution of the parameters of the psychometric function. Unlike previous Bayesian psychometric inference methods our software implementation-psignifit 4 performs numerical integration of the posterior within automatically determined bounds. This avoids the use of Markov chain Monte Carlo (MCMC) methods typically requiring expert knowledge. Extensive numerical tests show the validity of the approach and we discuss implications of overdispersion for experimental design. A comprehensive MATLAB toolbox implementing the method is freely available; a python implementation providing the basic capabilities is also available. (C) 2016 The Authors. Published by Elsevier Ltd.},
  language  = {en}
}
@article{SchadVasishth2022,
  author    = {Schad, Daniel and Vasishth, Shravan},
  title     = {The posterior probability of a null hypothesis given a statistically significant result},
  series = {The quantitative methods for psychology},
  volume    = {18},
  journal   = {The quantitative methods for psychology},
  number    = {2},
  publisher = {University of Montreal, Department of Psychology},
  address   = {Montreal},
  issn      = {1913-4126},
  doi       = {10.20982/tqmp.18.2.p011},
  pages     = {130 -- 141},
  year      = {2022},
  abstract  = {When researchers carry out a null hypothesis significance test, it is tempting to assume that a statistically significant result lowers Prob(H0), the probability of the null hypothesis being true. Technically, such a statement is meaningless for various reasons: e.g., the null hypothesis does not have a probability associated with it. However, it is possible to relax certain assumptions to compute the posterior probability Prob(H0) under repeated sampling. We show in a step-by-step guide that the intuitively appealing belief, that Prob(H0) is low when significant results have been obtained under repeated sampling, is in general incorrect and depends greatly on: (a) the prior probability of the null being true; (b) type-I error rate, (c) type-II error rate, and (d) replication of a result. Through step-by-step simulations using open-source code in the R System of Statistical Computing, we show that uncertainty about the null hypothesis being true often remains high despite a significant result. To help the reader develop intuitions about this common misconception, we provide a Shiny app (https://danielschad.shinyapps.io/probnull/). We expect that this tutorial will help researchers better understand and judge results from null hypothesis significance tests.},
  language  = {en}
}
@article{SavoyHesse2019,
  author    = {Savoy, Heather and Heße, Falk},
  title     = {Dimension reduction for integrating data series in Bayesian inversion of geostatistical models},
  series = {Stochastic environmental research and risk assessment},
  volume    = {33},
  journal   = {Stochastic environmental research and risk assessment},
  number    = {7},
  publisher = {Springer},
  address   = {New York},
  issn      = {1436-3240},
  doi       = {10.1007/s00477-019-01697-9},
  pages     = {1327 -- 1344},
  year      = {2019},
  abstract  = {This study explores methods with which multidimensional data, e.g. time series, can be effectively incorporated into a Bayesian framework for inferring geostatistical parameters. Such series are difficult to use directly in the likelihood estimation procedure due to their high dimensionality; thus, a dimension reduction approach is taken to utilize these measurements in the inference. Two synthetic scenarios from hydrology are explored in which pumping drawdown and concentration breakthrough curves are used to infer the global mean of a log-normally distributed hydraulic conductivity field. Both cases pursue the use of a parametric model to represent the shape of the observed time series with physically-interpretable parameters (e.g. the time and magnitude of a concentration peak), which is compared to subsets of the observations with similar dimensionality. The results from both scenarios highlight the effectiveness for the shape-matching models to reduce dimensionality from 100+ dimensions down to less than five. The models outperform the alternative subset method, especially when the observations are noisy. This approach to incorporating time series observations in the Bayesian framework for inferring geostatistical parameters allows for high-dimensional observations to be faithfully represented in lower-dimensional space for the non-parametric likelihood estimation procedure, which increases the applicability of the framework to more observation types. Although the scenarios are both from hydrogeology, the methodology is general in that no assumptions are made about the subject domain. Any application that requires the inference of geostatistical parameters using series in either time of space can use the approach described in this paper.},
  language  = {en}
}
@article{RosenbaumRaatzWeithoffetal.2019,
  author    = {Rosenbaum, Benjamin and Raatz, Michael and Weithoff, Guntram and Fussmann, Gregor F. and Gaedke, Ursula},
  title     = {Estimating parameters from multiple time series of population dynamics using bayesian inference},
  series = {Frontiers in ecology and evolution},
  volume    = {6},
  journal   = {Frontiers in ecology and evolution},
  publisher = {Frontiers Research Foundation},
  address   = {Lausanne},
  issn      = {2296-701X},
  doi       = {10.3389/fevo.2018.00234},
  pages     = {14},
  year      = {2019},
  abstract  = {Empirical time series of interacting entities, e.g., species abundances, are highly useful to study ecological mechanisms. Mathematical models are valuable tools to further elucidate those mechanisms and underlying processes. However, obtaining an agreement between model predictions and experimental observations remains a demanding task. As models always abstract from reality one parameter often summarizes several properties. Parameter measurements are performed in additional experiments independent of the ones delivering the time series. Transferring these parameter values to different settings may result in incorrect parametrizations. On top of that, the properties of organisms and thus the respective parameter values may vary considerably. These issues limit the use of a priori model parametrizations. In this study, we present a method suited for a direct estimation of model parameters and their variability from experimental time series data. We combine numerical simulations of a continuous-time dynamical population model with Bayesian inference, using a hierarchical framework that allows for variability of individual parameters. The method is applied to a comprehensive set of time series from a laboratory predator-prey system that features both steady states and cyclic population dynamics. Our model predictions are able to reproduce both steady states and cyclic dynamics of the data. Additionally to the direct estimates of the parameter values, the Bayesian approach also provides their uncertainties. We found that fitting cyclic population dynamics, which contain more information on the process rates than steady states, yields more precise parameter estimates. We detected significant variability among parameters of different time series and identified the variation in the maximum growth rate of the prey as a source for the transition from steady states to cyclic dynamics. By lending more flexibility to the model, our approach facilitates parametrizations and shows more easily which patterns in time series can be explained also by simple models. Applying Bayesian inference and dynamical population models in conjunction may help to quantify the profound variability in organismal properties in nature.},
  language  = {en}
}
@article{Reich2013,
  author    = {Reich, Sebastian},
  title     = {A nonparametric ensemble transform method for bayesian inference},
  series = {SIAM journal on scientific computing},
  volume    = {35},
  journal   = {SIAM journal on scientific computing},
  number    = {4},
  publisher = {Society for Industrial and Applied Mathematics},
  address   = {Philadelphia},
  issn      = {1064-8275},
  doi       = {10.1137/130907367},
  pages     = {A2013 -- A2024},
  year      = {2013},
  abstract  = {Many applications, such as intermittent data assimilation, lead to a recursive application of Bayesian inference within a Monte Carlo context. Popular data assimilation algorithms include sequential Monte Carlo methods and ensemble Kalman filters (EnKFs). These methods differ in the way Bayesian inference is implemented. Sequential Monte Carlo methods rely on importance sampling combined with a resampling step, while EnKFs utilize a linear transformation of Monte Carlo samples based on the classic Kalman filter. While EnKFs have proven to be quite robust even for small ensemble sizes, they are not consistent since their derivation relies on a linear regression ansatz. In this paper, we propose another transform method, which does not rely on any a priori assumptions on the underlying prior and posterior distributions. The new method is based on solving an optimal transportation problem for discrete random variables.},
  language  = {en}
}
@article{RabeChandraKruegeletal.2021,
  author    = {Rabe, Maximilian Michael and Chandra, Johan and Kr{\"u}gel, Andr{\´e} and Seelig, Stefan A. and Vasishth, Shravan and Engbert, Ralf},
  title     = {A bayesian approach to dynamical modeling of eye-movement control in reading of normal, mirrored, and scrambled texts},
  series = {Psychological Review},
  volume    = {128},
  journal   = {Psychological Review},
  number    = {5},
  publisher = {American Psychological Association},
  address   = {Washington},
  issn      = {0033-295X},
  doi       = {10.1037/rev0000268},
  pages     = {803 -- 823},
  year      = {2021},
  abstract  = {In eye-movement control during reading, advanced process-oriented models have been developed to reproduce behavioral data. So far, model complexity and large numbers of model parameters prevented rigorous statistical inference and modeling of interindividual differences. Here we propose a Bayesian approach to both problems for one representative computational model of sentence reading (SWIFT; Engbert et al., Psychological Review, 112, 2005, pp. 777-813). We used experimental data from 36 subjects who read the text in a normal and one of four manipulated text layouts (e.g., mirrored and scrambled letters). The SWIFT model was fitted to subjects and experimental conditions individually to investigate between- subject variability. Based on posterior distributions of model parameters, fixation probabilities and durations are reliably recovered from simulated data and reproduced for withheld empirical data, at both the experimental condition and subject levels. A subsequent statistical analysis of model parameters across reading conditions generates model-driven explanations for observable effects between conditions.},
  language  = {en}
}
@article{MolkenthinDonnerReichetal.2022,
  author    = {Molkenthin, Christian and Donner, Christian and Reich, Sebastian and Z{\"o}ller, Gert and Hainzl, Sebastian and Holschneider, Matthias and Opper, Manfred},
  title     = {GP-ETAS: semiparametric Bayesian inference for the spatio-temporal epidemic type aftershock sequence model},
  series = {Statistics and Computing},
  volume    = {32},
  journal   = {Statistics and Computing},
  number    = {2},
  publisher = {Springer},
  address   = {Dordrecht},
  issn      = {0960-3174},
  doi       = {10.1007/s11222-022-10085-3},
  pages     = {25},
  year      = {2022},
  abstract  = {The spatio-temporal epidemic type aftershock sequence (ETAS) model is widely used to describe the self-exciting nature of earthquake occurrences. While traditional inference methods provide only point estimates of the model parameters, we aim at a fully Bayesian treatment of model inference, allowing naturally to incorporate prior knowledge and uncertainty quantification of the resulting estimates. Therefore, we introduce a highly flexible, non-parametric representation for the spatially varying ETAS background intensity through a Gaussian process (GP) prior. Combined with classical triggering functions this results in a new model formulation, namely the GP-ETAS model. We enable tractable and efficient Gibbs sampling by deriving an augmented form of the GP-ETAS inference problem. This novel sampling approach allows us to assess the posterior model variables conditioned on observed earthquake catalogues, i.e., the spatial background intensity and the parameters of the triggering function. Empirical results on two synthetic data sets indicate that GP-ETAS outperforms standard models and thus demonstrate the predictive power for observed earthquake catalogues including uncertainty quantification for the estimated parameters. Finally, a case study for the l'Aquila region, Italy, with the devastating event on 6 April 2009, is presented.},
  language  = {en}
}