@article{VasishthGelman2021, author = {Vasishth, Shravan and Gelman, Andrew}, title = {How to embrace variation and accept uncertainty in linguistic and psycholinguistic data analysis}, series = {Linguistics : an interdisciplinary journal of the language sciences}, volume = {59}, journal = {Linguistics : an interdisciplinary journal of the language sciences}, number = {5}, publisher = {De Gruyter Mouton}, address = {Berlin}, issn = {0024-3949}, doi = {10.1515/ling-2019-0051}, pages = {1311 -- 1342}, year = {2021}, abstract = {The use of statistical inference in linguistics and related areas like psychology typically involves a binary decision: either reject or accept some null hypothesis using statistical significance testing. When statistical power is low, this frequentist data-analytic approach breaks down: null results are uninformative, and effect size estimates associated with significant results are overestimated. Using an example from psycholinguistics, several alternative approaches are demonstrated for reporting inconsistencies between the data and a theoretical prediction. The key here is to focus on committing to a falsifiable prediction, on quantifying uncertainty statistically, and learning to accept the fact that - in almost all practical data analysis situations - we can only draw uncertain conclusions from data, regardless of whether we manage to obtain statistical significance or not. A focus on uncertainty quantification is likely to lead to fewer excessively bold claims that, on closer investigation, may turn out to be not supported by the data.}, language = {en} } @misc{RheinwaltBookhagen2018, author = {Rheinwalt, Aljoscha and Bookhagen, Bodo}, title = {Network-based flow accumulation for point clouds}, series = {Remote Sensing for Agriculture, Ecosystems, and Hydrology XX}, volume = {10783}, journal = {Remote Sensing for Agriculture, Ecosystems, and Hydrology XX}, publisher = {SPIE-INT Society of Photo-Optical Instrumentation Engineers}, address = {Bellingham}, isbn = {978-1-5106-2150-3}, issn = {0277-786X}, doi = {10.1117/12.2318424}, pages = {12}, year = {2018}, abstract = {Point clouds provide high-resolution topographic data which is often classified into bare-earth, vegetation, and building points and then filtered and aggregated to gridded Digital Elevation Models (DEMs) or Digital Terrain Models (DTMs). Based on these equally-spaced grids flow-accumulation algorithms are applied to describe the hydrologic and geomorphologic mass transport on the surface. In this contribution, we propose a stochastic point-cloud filtering that, together with a spatial bootstrap sampling, allows for a flow accumulation directly on point clouds using Facet-Flow Networks (FFN). Additionally, this provides a framework for the quantification of uncertainties in point-cloud derived metrics such as Specific Catchment Area (SCA) even though the flow accumulation itself is deterministic.}, language = {en} } @article{PathirajaMoradkhaniMarshalletal.2018, author = {Pathiraja, Sahani Darschika and Moradkhani, H. and Marshall, L. and Sharma, Ashish and Geenens, G.}, title = {Data-driven model uncertainty estimation in hydrologic data assimilation}, series = {Water resources research : WRR / American Geophysical Union}, volume = {54}, journal = {Water resources research : WRR / American Geophysical Union}, number = {2}, publisher = {American Geophysical Union}, address = {Washington}, issn = {0043-1397}, doi = {10.1002/2018WR022627}, pages = {1252 -- 1280}, year = {2018}, abstract = {The increasing availability of earth observations necessitates mathematical methods to optimally combine such data with hydrologic models. Several algorithms exist for such purposes, under the umbrella of data assimilation (DA). However, DA methods are often applied in a suboptimal fashion for complex real-world problems, due largely to several practical implementation issues. One such issue is error characterization, which is known to be critical for a successful assimilation. Mischaracterized errors lead to suboptimal forecasts, and in the worst case, to degraded estimates even compared to the no assimilation case. Model uncertainty characterization has received little attention relative to other aspects of DA science. Traditional methods rely on subjective, ad hoc tuning factors or parametric distribution assumptions that may not always be applicable. We propose a novel data-driven approach (named SDMU) to model uncertainty characterization for DA studies where (1) the system states are partially observed and (2) minimal prior knowledge of the model error processes is available, except that the errors display state dependence. It includes an approach for estimating the uncertainty in hidden model states, with the end goal of improving predictions of observed variables. The SDMU is therefore suited to DA studies where the observed variables are of primary interest. Its efficacy is demonstrated through a synthetic case study with low-dimensional chaotic dynamics and a real hydrologic experiment for one-day-ahead streamflow forecasting. In both experiments, the proposed method leads to substantial improvements in the hidden states and observed system outputs over a standard method involving perturbation with Gaussian noise.}, language = {en} } @article{HuangHuangReichetal.2022, author = {Huang, Daniel Zhengyu and Huang, Jiaoyang and Reich, Sebastian and Stuart, Andrew M.}, title = {Efficient derivative-free Bayesian inference for large-scale inverse problems}, series = {Inverse problems : an international journal of inverse problems, inverse methods and computerised inversion of data}, volume = {38}, journal = {Inverse problems : an international journal of inverse problems, inverse methods and computerised inversion of data}, number = {12}, publisher = {IOP Publ. Ltd.}, address = {Bristol}, issn = {0266-5611}, doi = {10.1088/1361-6420/ac99fa}, pages = {40}, year = {2022}, abstract = {We consider Bayesian inference for large-scale inverse problems, where computational challenges arise from the need for repeated evaluations of an expensive forward model. This renders most Markov chain Monte Carlo approaches infeasible, since they typically require O(10(4)) model runs, or more. Moreover, the forward model is often given as a black box or is impractical to differentiate. Therefore derivative-free algorithms are highly desirable. We propose a framework, which is built on Kalman methodology, to efficiently perform Bayesian inference in such inverse problems. The basic method is based on an approximation of the filtering distribution of a novel mean-field dynamical system, into which the inverse problem is embedded as an observation operator. Theoretical properties are established for linear inverse problems, demonstrating that the desired Bayesian posterior is given by the steady state of the law of the filtering distribution of the mean-field dynamical system, and proving exponential convergence to it. This suggests that, for nonlinear problems which are close to Gaussian, sequentially computing this law provides the basis for efficient iterative methods to approximate the Bayesian posterior. Ensemble methods are applied to obtain interacting particle system approximations of the filtering distribution of the mean-field model; and practical strategies to further reduce the computational and memory cost of the methodology are presented, including low-rank approximation and a bi-fidelity approach. The effectiveness of the framework is demonstrated in several numerical experiments, including proof-of-concept linear/nonlinear examples and two large-scale applications: learning of permeability parameters in subsurface flow; and learning subgrid-scale parameters in a global climate model. Moreover, the stochastic ensemble Kalman filter and various ensemble square-root Kalman filters are all employed and are compared numerically. The results demonstrate that the proposed method, based on exponential convergence to the filtering distribution of a mean-field dynamical system, is competitive with pre-existing Kalman-based methods for inverse problems.}, language = {en} } @article{GaidzikPathirajaSaalfeldetal.2020, author = {Gaidzik, Franziska and Pathiraja, Sahani Darschika and Saalfeld, Sylvia and Stucht, Daniel and Speck, Oliver and Thevenin, Dominique and Janiga, Gabor}, title = {Hemodynamic data assimilation in a subject-specific circle of Willis geometry}, series = {Clinical Neuroradiology}, volume = {31}, journal = {Clinical Neuroradiology}, number = {3}, publisher = {Springer}, address = {Heidelberg}, issn = {1869-1439}, doi = {10.1007/s00062-020-00959-2}, pages = {643 -- 651}, year = {2020}, abstract = {Purpose The anatomy of the circle of Willis (CoW), the brain's main arterial blood supply system, strongly differs between individuals, resulting in highly variable flow fields and intracranial vascularization patterns. To predict subject-specific hemodynamics with high certainty, we propose a data assimilation (DA) approach that merges fully 4D phase-contrast magnetic resonance imaging (PC-MRI) data with a numerical model in the form of computational fluid dynamics (CFD) simulations. Methods To the best of our knowledge, this study is the first to provide a transient state estimate for the three-dimensional velocity field in a subject-specific CoW geometry using DA. High-resolution velocity state estimates are obtained using the local ensemble transform Kalman filter (LETKF). Results Quantitative evaluation shows a considerable reduction (up to 90\%) in the uncertainty of the velocity field state estimate after the data assimilation step. Velocity values in vessel areas that are below the resolution of the PC-MRI data (e.g., in posterior communicating arteries) are provided. Furthermore, the uncertainty of the analysis-based wall shear stress distribution is reduced by a factor of 2 for the data assimilation approach when compared to the CFD model alone. Conclusion This study demonstrates the potential of data assimilation to provide detailed information on vascular flow, and to reduce the uncertainty in such estimates by combining various sources of data in a statistically appropriate fashion.}, language = {en} } @article{CarpentierKim2018, author = {Carpentier, Alexandra and Kim, Arlene K. H.}, title = {An iterative hard thresholding estimator for low rank matrix recovery with explicit limiting distribution}, series = {Statistica Sinica}, volume = {28}, journal = {Statistica Sinica}, number = {3}, publisher = {Statistica Sinica, Institute of Statistical Science, Academia Sinica}, address = {Taipei}, issn = {1017-0405}, doi = {10.5705/ss.202016.0103}, pages = {1371 -- 1393}, year = {2018}, abstract = {We consider the problem of low rank matrix recovery in a stochastically noisy high-dimensional setting. We propose a new estimator for the low rank matrix, based on the iterative hard thresholding method, that is computationally efficient and simple. We prove that our estimator is optimal in terms of the Frobenius risk and in terms of the entry-wise risk uniformly over any change of orthonormal basis, allowing us to provide the limiting distribution of the estimator. When the design is Gaussian, we prove that the entry-wise bias of the limiting distribution of the estimator is small, which is of interest for constructing tests and confidence sets for low-dimensional subsets of entries of the low rank matrix.}, language = {en} }