@article{AcevedoDeWiljesReich2017, author = {Acevedo, Walter and De Wiljes, Jana and Reich, Sebastian}, title = {Second-order accurate ensemble transform particle filters}, series = {SIAM journal on scientific computing}, volume = {39}, journal = {SIAM journal on scientific computing}, number = {5}, publisher = {Society for Industrial and Applied Mathematics}, address = {Philadelphia}, issn = {1064-8275}, doi = {10.1137/16M1095184}, pages = {A1834 -- A1850}, year = {2017}, abstract = {Particle filters (also called sequential Monte Carlo methods) are widely used for state and parameter estimation problems in the context of nonlinear evolution equations. The recently proposed ensemble transform particle filter (ETPF) [S. Reich, SIAM T. Sci. Comput., 35, (2013), pp. A2013-A2014[ replaces the resampling step of a standard particle filter by a linear transformation which allows for a hybridization of particle filters with ensemble Kalman filters and renders the resulting hybrid filters applicable to spatially extended systems. However, the linear transformation step is computationally expensive and leads to an underestimation of the ensemble spread for small and moderate ensemble sizes. Here we address both of these shortcomings by developing second order accurate extensions of the ETPF. These extensions allow one in particular to replace the exact solution of a linear transport problem by its Sinkhorn approximation. It is also demonstrated that the nonlinear ensemble transform filter arises as a special case of our general framework. We illustrate the performance of the second-order accurate filters for the chaotic Lorenz-63 and Lorenz-96 models and a dynamic scene-viewing model. The numerical results for the Lorenz-63 and Lorenz-96 models demonstrate that significant accuracy improvements can be achieved in comparison to a standard ensemble Kalman filter and the ETPF for small to moderate ensemble sizes. The numerical results for the scene-viewing model reveal, on the other hand, that second-order corrections can lead to statistically inconsistent samples from the posterior parameter distribution.}, language = {en} } @article{BernerTrauthHolschneider2022, author = {Berner, Nadine and Trauth, Martin H. and Holschneider, Matthias}, title = {Bayesian inference about Plio-Pleistocene climate transitions in Africa}, series = {Quaternary science reviews : the international multidisciplinary research and review journal}, volume = {277}, journal = {Quaternary science reviews : the international multidisciplinary research and review journal}, publisher = {Elsevier}, address = {Oxford}, issn = {0277-3791}, doi = {10.1016/j.quascirev.2021.107287}, pages = {12}, year = {2022}, abstract = {During the last 5 Ma the Earth's ocean-atmosphere system passed through several major transitions, many of which are discussed as possible triggers for human evolution. A classic in this context is the possible influence of the closure of the Panama Strait, the intensification of Northern Hemisphere Glaciation, a stepwise increase in aridity in Africa, and the first appearance of the genus Homo about 2.5 - 2.7 Ma ago. Apart from the fact that the correlation between these events does not necessarily imply causality, many attempts to establish a relationship between climate and evolution fail due to the challenge of precisely localizing an a priori unknown number of changes potentially underlying complex climate records. The kernel-based Bayesian inference approach applied here allows inferring the location, generic shape, and temporal scale of multiple transitions in established records of Plio-Pleistocene African climate. By defining a transparent probabilistic analysis strategy, we are able to identify conjoint changes occurring across the investigated terrigenous dust records from Ocean Drilling Programme (ODP) sites in the Atlantic Ocean (ODP 659), Arabian (ODP 721/722) and Mediterranean Sea (ODP 967). The study indicates a two-step transition in the African climate proxy records at (2.35-2.10) Ma and (1.70 - 1.50) Ma, that may be associated with the reorganization of the Hadley-Walker Circulation. .}, language = {en} } @article{GarbunoInigoNueskenReich2020, author = {Garbuno-Inigo, Alfredo and N{\"u}sken, Nikolas and Reich, Sebastian}, title = {Affine invariant interacting Langevin dynamics for Bayesian inference}, series = {SIAM journal on applied dynamical systems}, volume = {19}, journal = {SIAM journal on applied dynamical systems}, number = {3}, publisher = {Society for Industrial and Applied Mathematics}, address = {Philadelphia}, issn = {1536-0040}, doi = {10.1137/19M1304891}, pages = {1633 -- 1658}, year = {2020}, abstract = {We propose a computational method (with acronym ALDI) for sampling from a given target distribution based on first-order (overdamped) Langevin dynamics which satisfies the property of affine invariance. The central idea of ALDI is to run an ensemble of particles with their empirical covariance serving as a preconditioner for their underlying Langevin dynamics. ALDI does not require taking the inverse or square root of the empirical covariance matrix, which enables application to high-dimensional sampling problems. The theoretical properties of ALDI are studied in terms of nondegeneracy and ergodicity. Furthermore, we study its connections to diffusion on Riemannian manifolds and Wasserstein gradient flows. Bayesian inference serves as a main application area for ALDI. In case of a forward problem with additive Gaussian measurement errors, ALDI allows for a gradient-free approximation in the spirit of the ensemble Kalman filter. A computational comparison between gradient-free and gradient-based ALDI is provided for a PDE constrained Bayesian inverse problem.}, language = {en} } @article{GianniotisSchnoerrMolkenthinetal.2016, author = {Gianniotis, Nikolaos and Schnoerr, Christoph and Molkenthin, Christian and Bora, Sanjay Singh}, title = {Approximate variational inference based on a finite sample of Gaussian latent variables}, series = {Pattern Analysis \& Applications}, volume = {19}, journal = {Pattern Analysis \& Applications}, publisher = {Springer}, address = {New York}, issn = {1433-7541}, doi = {10.1007/s10044-015-0496-9}, pages = {475 -- 485}, year = {2016}, abstract = {Variational methods are employed in situations where exact Bayesian inference becomes intractable due to the difficulty in performing certain integrals. Typically, variational methods postulate a tractable posterior and formulate a lower bound on the desired integral to be approximated, e.g. marginal likelihood. The lower bound is then optimised with respect to its free parameters, the so-called variational parameters. However, this is not always possible as for certain integrals it is very challenging (or tedious) to come up with a suitable lower bound. Here, we propose a simple scheme that overcomes some of the awkward cases where the usual variational treatment becomes difficult. The scheme relies on a rewriting of the lower bound on the model log-likelihood. We demonstrate the proposed scheme on a number of synthetic and real examples, as well as on a real geophysical model for which the standard variational approaches are inapplicable.}, language = {en} } @phdthesis{Makarava2012, author = {Makarava, Natallia}, title = {Bayesian estimation of self-similarity exponent}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-64099}, school = {Universit{\"a}t Potsdam}, year = {2012}, abstract = {Estimation of the self-similarity exponent has attracted growing interest in recent decades and became a research subject in various fields and disciplines. Real-world data exhibiting self-similar behavior and/or parametrized by self-similarity exponent (in particular Hurst exponent) have been collected in different fields ranging from finance and human sciencies to hydrologic and traffic networks. Such rich classes of possible applications obligates researchers to investigate qualitatively new methods for estimation of the self-similarity exponent as well as identification of long-range dependencies (or long memory). In this thesis I present the Bayesian estimation of the Hurst exponent. In contrast to previous methods, the Bayesian approach allows the possibility to calculate the point estimator and confidence intervals at the same time, bringing significant advantages in data-analysis as discussed in this thesis. Moreover, it is also applicable to short data and unevenly sampled data, thus broadening the range of systems where the estimation of the Hurst exponent is possible. Taking into account that one of the substantial classes of great interest in modeling is the class of Gaussian self-similar processes, this thesis considers the realizations of the processes of fractional Brownian motion and fractional Gaussian noise. Additionally, applications to real-world data, such as the data of water level of the Nile River and fixational eye movements are also discussed.}, language = {en} } @phdthesis{Malchow2023, author = {Malchow, Anne-Kathleen}, title = {Developing an integrated platform for predicting niche and range dynamics}, doi = {10.25932/publishup-60273}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-602737}, school = {Universit{\"a}t Potsdam}, pages = {xiv, 169}, year = {2023}, abstract = {Species are adapted to the environment they live in. Today, most environments are subjected to rapid global changes induced by human activity, most prominently land cover and climate changes. Such transformations can cause adjustments or disruptions in various eco-evolutionary processes. The repercussions of this can appear at the population level as shifted ranges and altered abundance patterns. This is where global change effects on species are usually detected first. To understand how eco-evolutionary processes act and interact to generate patterns of range and abundance and how these processes themselves are influenced by environmental conditions, spatially-explicit models provide effective tools. They estimate a species' niche as the set of environmental conditions in which it can persist. However, the currently most commonly used models rely on static correlative associations that are established between a set of spatial predictors and observed species distributions. For this, they assume stationary conditions and are therefore unsuitable in contexts of global change. Better equipped are process-based models that explicitly implement algorithmic representations of eco-evolutionary mechanisms and evaluate their joint dynamics. These models have long been regarded as difficult to parameterise, but an increased data availability and improved methods for data integration lessen this challenge. Hence, the goal of this thesis is to further develop process-based models, integrate them into a complete modelling workflow, and provide the tools and guidance for their successful application. With my thesis, I presented an integrated platform for spatially-explicit eco-evolutionary modelling and provided a workflow for their inverse calibration to observational data. In the first chapter, I introduced RangeShiftR, a software tool that implements an individual-based modelling platform for the statistical programming language R. Its open-source licensing, extensive help pages and available tutorials make it accessible to a wide audience. In the second chapter, I demonstrated a comprehensive workflow for the specification, calibration and validation of RangeShiftR by the example of the red kite in Switzerland. The integration of heterogeneous data sources, such as literature and monitoring data, allowed to successfully calibrate the model. It was then used to make validated, spatio-temporal predictions of future red kite abundance. The presented workflow can be adopted to any study species if data is available. In the third chapter, I extended RangeShiftR to directly link demographic processes to climatic predictors. This allowed me to explore the climate-change responses of eight Swiss breeding birds in more detail. Specifically, the model could identify the most influential climatic predictors, delineate areas of projected demographic suitability, and attribute current population trends to contemporary climate change. My work shows that the application of complex, process-based models in conservation-relevant contexts is feasible, utilising available tools and data. Such models can be successfully calibrated and outperform other currently used modelling approaches in terms of predictive accuracy. Their projections can be used to predict future abundances or to assess alternative conservation scenarios. They further improve our mechanistic understanding of niche and range dynamics under climate change. However, only fully mechanistic models, that include all relevant processes, allow to precisely disentangle the effects of single processes on observed abundances. In this respect, the RangeShiftR model still has potential for further extensions that implement missing influential processes, such as species interactions. Dynamic, process-based models are needed to adequately model a dynamic reality. My work contributes towards the advancement, integration and dissemination of such models. This will facilitate numeric, model-based approaches for species assessments, generate ecological insights and strengthen the reliability of predictions on large spatial scales under changing conditions.}, language = {en} } @phdthesis{MalemShinitski2023, author = {Malem-Shinitski, Noa}, title = {Bayesian inference and modeling for point processes with applications from neuronal activity to scene viewing}, doi = {10.25932/publishup-61495}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-614952}, school = {Universit{\"a}t Potsdam}, pages = {vii, 129}, year = {2023}, abstract = {Point processes are a common methodology to model sets of events. From earthquakes to social media posts, from the arrival times of neuronal spikes to the timing of crimes, from stock prices to disease spreading -- these phenomena can be reduced to the occurrences of events concentrated in points. Often, these events happen one after the other defining a time--series. Models of point processes can be used to deepen our understanding of such events and for classification and prediction. Such models include an underlying random process that generates the events. This work uses Bayesian methodology to infer the underlying generative process from observed data. Our contribution is twofold -- we develop new models and new inference methods for these processes. We propose a model that extends the family of point processes where the occurrence of an event depends on the previous events. This family is known as Hawkes processes. Whereas in most existing models of such processes, past events are assumed to have only an excitatory effect on future events, we focus on the newly developed nonlinear Hawkes process, where past events could have excitatory and inhibitory effects. After defining the model, we present its inference method and apply it to data from different fields, among others, to neuronal activity. The second model described in the thesis concerns a specific instance of point processes --- the decision process underlying human gaze control. This process results in a series of fixated locations in an image. We developed a new model to describe this process, motivated by the known Exploration--Exploitation dilemma. Alongside the model, we present a Bayesian inference algorithm to infer the model parameters. Remaining in the realm of human scene viewing, we identify the lack of best practices for Bayesian inference in this field. We survey four popular algorithms and compare their performances for parameter inference in two scan path models. The novel models and inference algorithms presented in this dissertation enrich the understanding of point process data and allow us to uncover meaningful insights.}, language = {en} } @article{MalemShinitskiOjedaOpper2022, author = {Malem-Shinitski, Noa and Ojeda, Cesar and Opper, Manfred}, title = {Variational bayesian inference for nonlinear hawkes process with gaussian process self-effects}, series = {Entropy}, volume = {24}, journal = {Entropy}, number = {3}, publisher = {MDPI}, address = {Basel}, issn = {1099-4300}, doi = {10.3390/e24030356}, pages = {22}, year = {2022}, abstract = {Traditionally, Hawkes processes are used to model time-continuous point processes with history dependence. Here, we propose an extended model where the self-effects are of both excitatory and inhibitory types and follow a Gaussian Process. Whereas previous work either relies on a less flexible parameterization of the model, or requires a large amount of data, our formulation allows for both a flexible model and learning when data are scarce. We continue the line of work of Bayesian inference for Hawkes processes, and derive an inference algorithm by performing inference on an aggregated sum of Gaussian Processes. Approximate Bayesian inference is achieved via data augmentation, and we describe a mean-field variational inference approach to learn the model parameters. To demonstrate the flexibility of the model we apply our methodology on data from different domains and compare it to previously reported results.}, language = {en} } @article{MarionMcInernyPageletal.2012, author = {Marion, Glenn and McInerny, Greg J. and Pagel, J{\"o}rn and Catterall, Stephen and Cook, Alex R. and Hartig, Florian and O\&rsquo, and Hara, Robert B.}, title = {Parameter and uncertainty estimation for process-oriented population and distribution models: data, statistics and the niche}, series = {JOURNAL OF BIOGEOGRAPHY}, volume = {39}, journal = {JOURNAL OF BIOGEOGRAPHY}, number = {12}, publisher = {WILEY-BLACKWELL}, address = {HOBOKEN}, issn = {0305-0270}, doi = {10.1111/j.1365-2699.2012.02772.x}, pages = {2225 -- 2239}, year = {2012}, abstract = {The spatial distribution of a species is determined by dynamic processes such as reproduction, mortality and dispersal. Conventional static species distribution models (SDMs) do not incorporate these processes explicitly. This limits their applicability, particularly for non-equilibrium situations such as invasions or climate change. In this paper we show how dynamic SDMs can be formulated and fitted to data within a Bayesian framework. Our focus is on discrete state-space Markov process models which provide a flexible framework to account for stochasticity in key demographic processes, including dispersal, growth and competition. We show how to construct likelihood functions for such models (both discrete and continuous time versions) and how these can be combined with suitable observation models to conduct Bayesian parameter inference using computational techniques such as Markov chain Monte Carlo. We illustrate the current state-of-the-art with three contrasting examples using both simulated and empirical data. The use of simulated data allows the robustness of the methods to be tested with respect to deficiencies in both data and model. These examples show how mechanistic understanding of the processes that determine distribution and abundance can be combined with different sources of information at a range of spatial and temporal scales. Application of such techniques will enable more reliable inference and projections, e.g. under future climate change scenarios than is possible with purely correlative approaches. Conversely, confronting such process-oriented niche models with abundance and distribution data will test current understanding and may ultimately feedback to improve underlying ecological theory.}, language = {en} } @article{MillerSchwarz2011, author = {Miller, Jeff and Schwarz, Wolfgang}, title = {Aggregate and individual replication probability within an explicit model of the research process}, series = {Psychological methods}, volume = {16}, journal = {Psychological methods}, number = {3}, publisher = {American Psychological Association}, address = {Washington}, issn = {1082-989X}, doi = {10.1037/a0023347}, pages = {337 -- 360}, year = {2011}, abstract = {We study a model of the research process in which the true effect size, the replication jitter due to changes in experimental procedure, and the statistical error of effect size measurement are all normally distributed random variables. Within this model, we analyze the probability of successfully replicating an initial experimental result by obtaining either a statistically significant result in the same direction or any effect in that direction. We analyze both the probability of successfully replicating a particular experimental effect (i.e., the individual replication probability) and the average probability of successful replication across different studies within some research context (i.e., the aggregate replication probability), and we identify the conditions under which the latter can be approximated using the formulas of Killeen (2005a, 2007). We show how both of these probabilities depend on parameters of the research context that would rarely be known in practice. In addition, we show that the statistical uncertainty associated with the size of an initial observed effect would often prevent accurate estimation of the desired individual replication probability even if these research context parameters were known exactly. We conclude that accurate estimates of replication probability are generally unattainable.}, language = {en} }