@article{VasishthGelman2021, author = {Vasishth, Shravan and Gelman, Andrew}, title = {How to embrace variation and accept uncertainty in linguistic and psycholinguistic data analysis}, series = {Linguistics : an interdisciplinary journal of the language sciences}, volume = {59}, journal = {Linguistics : an interdisciplinary journal of the language sciences}, number = {5}, publisher = {De Gruyter Mouton}, address = {Berlin}, issn = {0024-3949}, doi = {10.1515/ling-2019-0051}, pages = {1311 -- 1342}, year = {2021}, abstract = {The use of statistical inference in linguistics and related areas like psychology typically involves a binary decision: either reject or accept some null hypothesis using statistical significance testing. When statistical power is low, this frequentist data-analytic approach breaks down: null results are uninformative, and effect size estimates associated with significant results are overestimated. Using an example from psycholinguistics, several alternative approaches are demonstrated for reporting inconsistencies between the data and a theoretical prediction. The key here is to focus on committing to a falsifiable prediction, on quantifying uncertainty statistically, and learning to accept the fact that - in almost all practical data analysis situations - we can only draw uncertain conclusions from data, regardless of whether we manage to obtain statistical significance or not. A focus on uncertainty quantification is likely to lead to fewer excessively bold claims that, on closer investigation, may turn out to be not supported by the data.}, language = {en} } @article{SchmidtHesseAttingeretal.2020, author = {Schmidt, Lennart and Hesse, Falk and Attinger, Sabine and Kumar, Rohini}, title = {Challenges in applying machine learning models for hydrological inference}, series = {Water resources research}, volume = {56}, journal = {Water resources research}, number = {5}, publisher = {American Geophysical Union}, address = {Washington}, issn = {0043-1397}, doi = {10.1029/2019WR025924}, pages = {10}, year = {2020}, abstract = {Machine learning (ML) algorithms are being increasingly used in Earth and Environmental modeling studies owing to the ever-increasing availability of diverse data sets and computational resources as well as advancement in ML algorithms. Despite advances in their predictive accuracy, the usefulness of ML algorithms for inference remains elusive. In this study, we employ two popular ML algorithms, artificial neural networks and random forest, to analyze a large data set of flood events across Germany with the goals to analyze their predictive accuracy and their usability to provide insights to hydrologic system functioning. The results of the ML algorithms are contrasted against a parametric approach based on multiple linear regression. For analysis, we employ a model-agnostic framework named Permuted Feature Importance to derive the influence of models' predictors. This allows us to compare the results of different algorithms for the first time in the context of hydrology. Our main findings are that (1) the ML models achieve higher prediction accuracy than linear regression, (2) the results reflect basic hydrological principles, but (3) further inference is hindered by the heterogeneity of results across algorithms. Thus, we conclude that the problem of equifinality as known from classical hydrological modeling also exists for ML and severely hampers its potential for inference. To account for the observed problems, we propose that when employing ML for inference, this should be made by using multiple algorithms and multiple methods, of which the latter should be embedded in a cross-validation routine.}, language = {en} } @article{SchmidtHesseAttingeretal.2020, author = {Schmidt, Lennart and Heße, Falk and Attinger, Sabine and Kumar, Rohini}, title = {Challenges in applying machine learning models for hydrological inference: a case study for flooding events across Germany}, series = {Water Resources Research}, volume = {56}, journal = {Water Resources Research}, number = {5}, publisher = {John Wiley \& Sons, Inc.}, address = {New Jersey}, pages = {10}, year = {2020}, abstract = {Machine learning (ML) algorithms are being increasingly used in Earth and Environmental modeling studies owing to the ever-increasing availability of diverse data sets and computational resources as well as advancement in ML algorithms. Despite advances in their predictive accuracy, the usefulness of ML algorithms for inference remains elusive. In this study, we employ two popular ML algorithms, artificial neural networks and random forest, to analyze a large data set of flood events across Germany with the goals to analyze their predictive accuracy and their usability to provide insights to hydrologic system functioning. The results of the ML algorithms are contrasted against a parametric approach based on multiple linear regression. For analysis, we employ a model-agnostic framework named Permuted Feature Importance to derive the influence of models' predictors. This allows us to compare the results of different algorithms for the first time in the context of hydrology. Our main findings are that (1) the ML models achieve higher prediction accuracy than linear regression, (2) the results reflect basic hydrological principles, but (3) further inference is hindered by the heterogeneity of results across algorithms. Thus, we conclude that the problem of equifinality as known from classical hydrological modeling also exists for ML and severely hampers its potential for inference. To account for the observed problems, we propose that when employing ML for inference, this should be made by using multiple algorithms and multiple methods, of which the latter should be embedded in a cross-validation routine.}, language = {en} } @article{ArslanBastiaanseFelser2015, author = {Arslan, Se{\c{c}}kin and Bastiaanse, Roelien and Felser, Claudia}, title = {Looking at the evidence in visual world: eye-movements reveal how bilingual and monolingual Turkish speakers process grammatical evidentiality}, series = {Frontiers in psychology}, volume = {6}, journal = {Frontiers in psychology}, publisher = {Frontiers Research Foundation}, address = {Lausanne}, issn = {1664-1078}, doi = {10.3389/fpsyg.2015.01387}, pages = {13}, year = {2015}, abstract = {This study presents pioneering data on how adult early bilinguals (heritage speakers) and late bilingual speakers of Turkish and German process grammatical evidentiality in a visual world setting in comparison to monolingual speakers of Turkish. Turkish marks evidentiality, the linguistic reference to information source, through inflectional affixes signaling either direct (-DI) or indirect (-mls) evidentiality. We conducted an eyetracking-during-listening experiment where participants were given access to visual 'evidence' supporting the use of either a direct or indirect evidential form. The behavioral results indicate that the monolingual Turkish speakers comprehended direct and indirect evidential scenarios equally well. In contrast, both late and early bilinguals were less accurate and slower to respond to direct than to indirect evidentials. The behavioral results were also reflected in the proportions of looks data. That is, both late and early bilinguals fixated less frequently on the target picture in the direct than in the indirect evidential condition while the monolinguals showed no difference between these conditions. Taken together, our results indicate reduced sensitivity to the semantic and pragmatic function of direct evidential forms in both late and early bilingual speakers, suggesting a simplification of the Turkish evidentiality system in Turkish heritage grammars. We discuss our findings with regard to theories of incomplete acquisition and first language attrition.}, language = {en} }