@article{SchudomaLarhlimiWalther2011, author = {Schudoma, Christian and Larhlimi, Abdelhalim and Walther, Dirk}, title = {The influence of the local sequence environment on RNA loop structures}, series = {RNA : a publication of the RNA Society}, volume = {17}, journal = {RNA : a publication of the RNA Society}, number = {7}, publisher = {Cold Spring Harbor Laboratory Press}, address = {Cold Spring Harbor, NY}, issn = {1355-8382}, doi = {10.1261/rna.2550211}, pages = {1247 -- 1257}, year = {2011}, abstract = {RNA folding is assumed to be a hierarchical process. The secondary structure of an RNA molecule, signified by base-pairing and stacking interactions between the paired bases, is formed first. Subsequently, the RNA molecule adopts an energetically favorable three-dimensional conformation in the structural space determined mainly by the rotational degrees of freedom associated with the backbone of regions of unpaired nucleotides (loops). To what extent the backbone conformation of RNA loops also results from interactions within the local sequence context or rather follows global optimization constraints alone has not been addressed yet. Because the majority of base stacking interactions are exerted locally, a critical influence of local sequence on local structure appears plausible. Thus, local loop structure ought to be predictable, at least in part, from the local sequence context alone. To test this hypothesis, we used Random Forests on a nonredundant data set of unpaired nucleotides extracted from 97 X-ray structures from the Protein Data Bank (PDB) to predict discrete backbone angle conformations given by the discretized eta/theta-pseudo-torsional space. Predictions on balanced sets with four to six conformational classes using local sequence information yielded average accuracies of up to 55\%, thus significantly better than expected by chance (17\%-25\%). Bases close to the central nucleotide appear to be most tightly linked to its conformation. Our results suggest that RNA loop structure does not only depend on long-range base-pairing interactions; instead, it appears that local sequence context exerts a significant influence on the formation of the local loop structure.}, language = {en} } @article{KibrikKhudyakovaDobrovetal.2016, author = {Kibrik, Andrej A. and Khudyakova, Mariya V. and Dobrov, Grigory B. and Linnik, Anastasia and Zalmanov, Dmitrij A.}, title = {Referential Choice}, series = {Frontiers in psychology}, volume = {7}, journal = {Frontiers in psychology}, publisher = {Frontiers Research Foundation}, address = {Lausanne}, issn = {1664-1078}, doi = {10.3389/fpsyg.2016.01429}, year = {2016}, abstract = {We report a study of referential choice in discourse production, understood as the choice between various types of referential devices, such as pronouns and full noun phrases. Our goal is to predict referential choice, and to explore to what extent such prediction is possible. Our approach to referential choice includes a cognitively informed theoretical component, corpus analysis, machine learning methods and experimentation with human participants. Machine learning algorithms make use of 25 factors, including referent's properties (such as animacy and protagonism), the distance between a referential expression and its antecedent, the antecedent's syntactic role, and so on. Having found the predictions of our algorithm to coincide with the original almost 90\% of the time, we hypothesized that fully accurate prediction is not possible because, in many situations, more than one referential option is available. This hypothesis was supported by an experimental study, in which participants answered questions about either the original text in the corpus, or about a text modified in accordance with the algorithm's prediction. Proportions of correct answers to these questions, as well as participants' rating of the questions' difficulty, suggested that divergences between the algorithm's prediction and the original referential device in the corpus occur overwhelmingly in situations where the referential choice is not categorical.}, language = {en} } @article{KibrikKhudyakovaDobrovetal.2016, author = {Kibrik, Andrej A. and Khudyakova, Mariya V. and Dobrov, Grigory B. and Linnik, Anastasia and Zalmanov, Dmitrij A.}, title = {Referential Choice: Predictability and Its Limits}, series = {Frontiers in psychology}, volume = {7}, journal = {Frontiers in psychology}, publisher = {Frontiers Research Foundation}, address = {Lausanne}, issn = {1664-1078}, doi = {10.3389/fpsyg.2016.01429}, pages = {9939 -- 9947}, year = {2016}, abstract = {We report a study of referential choice in discourse production, understood as the choice between various types of referential devices, such as pronouns and full noun phrases. Our goal is to predict referential choice, and to explore to what extent such prediction is possible. Our approach to referential choice includes a cognitively informed theoretical component, corpus analysis, machine learning methods and experimentation with human participants. Machine learning algorithms make use of 25 factors, including referent's properties (such as animacy and protagonism), the distance between a referential expression and its antecedent, the antecedent's syntactic role, and so on. Having found the predictions of our algorithm to coincide with the original almost 90\% of the time, we hypothesized that fully accurate prediction is not possible because, in many situations, more than one referential option is available. This hypothesis was supported by an experimental study, in which participants answered questions about either the original text in the corpus, or about a text modified in accordance with the algorithm's prediction. Proportions of correct answers to these questions, as well as participants' rating of the questions' difficulty, suggested that divergences between the algorithm's prediction and the original referential device in the corpus occur overwhelmingly in situations where the referential choice is not categorical.}, language = {en} } @article{SprengerErbanSeddigetal.2017, author = {Sprenger, Heike and Erban, Alexander and Seddig, Sylvia and Rudack, Katharina and Thalhammer, Anja and Le, Mai Q. and Walther, Dirk and Zuther, Ellen and Koehl, Karin I. and Kopka, Joachim and Hincha, Dirk K.}, title = {Metabolite and transcript markers for the prediction of potato drought tolerance}, series = {Plant Biotechnology Journal}, volume = {16}, journal = {Plant Biotechnology Journal}, number = {4}, publisher = {Wiley}, address = {Hoboken}, issn = {1467-7644}, doi = {10.1111/pbi.12840}, pages = {939 -- 950}, year = {2017}, abstract = {Potato (Solanum tuberosum L.) is one of the most important food crops worldwide. Current potato varieties are highly susceptible to drought stress. In view of global climate change, selection of cultivars with improved drought tolerance and high yield potential is of paramount importance. Drought tolerance breeding of potato is currently based on direct selection according to yield and phenotypic traits and requires multiple trials under drought conditions. Marker-assisted selection (MAS) is cheaper, faster and reduces classification errors caused by noncontrolled environmental effects. We analysed 31 potato cultivars grown under optimal and reduced water supply in six independent field trials. Drought tolerance was determined as tuber starch yield. Leaf samples from young plants were screened for preselected transcript and nontargeted metabolite abundance using qRT-PCR and GC-MS profiling, respectively. Transcript marker candidates were selected from a published RNA-Seq data set. A Random Forest machine learning approach extracted metabolite and transcript markers for drought tolerance prediction with low error rates of 6\% and 9\%, respectively. Moreover, by combining transcript and metabolite markers, the prediction error was reduced to 4.3\%. Feature selection from Random Forest models allowed model minimization, yielding a minimal combination of only 20 metabolite and transcript markers that were successfully tested for their reproducibility in 16 independent agronomic field trials. We demonstrate that a minimum combination of transcript and metabolite markers sampled at early cultivation stages predicts potato yield stability under drought largely independent of seasonal and regional agronomic conditions.}, language = {en} } @article{AyzelIzhitskiy2019, author = {Ayzel, Georgy and Izhitskiy, Alexander}, title = {Climate Change Impact Assessment on Freshwater Inflow into the Small Aral Sea}, series = {Water}, volume = {11}, journal = {Water}, number = {11}, publisher = {MDPI}, address = {Basel}, issn = {2073-4441}, doi = {10.3390/w11112377}, pages = {19}, year = {2019}, abstract = {During the last few decades, the rapid separation of the Small Aral Sea from the isolated basin has changed its hydrological and ecological conditions tremendously. In the present study, we developed and validated the hybrid model for the Syr Darya River basin based on a combination of state-of-the-art hydrological and machine learning models. Climate change impact on freshwater inflow into the Small Aral Sea for the projection period 2007-2099 has been quantified based on the developed hybrid model and bias corrected and downscaled meteorological projections simulated by four General Circulation Models (GCM) for each of three Representative Concentration Pathway scenarios (RCP). The developed hybrid model reliably simulates freshwater inflow for the historical period with a Nash-Sutcliffe efficiency of 0.72 and a Kling-Gupta efficiency of 0.77. Results of the climate change impact assessment showed that the freshwater inflow projections produced by different GCMs are misleading by providing contradictory results for the projection period. However, we identified that the relative runoff changes are expected to be more pronounced in the case of more aggressive RCP scenarios. The simulated projections of freshwater inflow provide a basis for further assessment of climate change impacts on hydrological and ecological conditions of the Small Aral Sea in the 21st Century.}, language = {en} } @article{FrommholdHeimBarabanovetal.2019, author = {Frommhold, Martin and Heim, Arend and Barabanov, Mikhail and Maier, Franziska and M{\"u}hle, Ralf-Udo and Smirenski, Sergei M. and Heim, Wieland}, title = {Breeding habitat and nest-site selection by an obligatory "nest-cleptoparasite", the Amur Falcon Falco amurensis}, series = {Ecology and evolution}, volume = {9}, journal = {Ecology and evolution}, number = {24}, publisher = {Wiley}, address = {Hoboken}, issn = {2045-7758}, doi = {10.1002/ece3.5878}, pages = {14430 -- 14441}, year = {2019}, abstract = {The selection of a nest site is crucial for successful reproduction of birds. Animals which re-use or occupy nest sites constructed by other species often have limited choice. Little is known about the criteria of nest-stealing species to choose suitable nesting sites and habitats. Here, we analyze breeding-site selection of an obligatory "nest-cleptoparasite", the Amur Falcon Falco amurensis. We collected data on nest sites at Muraviovka Park in the Russian Far East, where the species breeds exclusively in nests of the Eurasian Magpie Pica pica. We sampled 117 Eurasian Magpie nests, 38 of which were occupied by Amur Falcons. Nest-specific variables were assessed, and a recently developed habitat classification map was used to derive landscape metrics. We found that Amur Falcons chose a wide range of nesting sites, but significantly preferred nests with a domed roof. Breeding pairs of Eurasian Hobby Falco subbuteo and Eurasian Magpie were often found to breed near the nest in about the same distance as neighboring Amur Falcon pairs. Additionally, the occurrence of the species was positively associated with bare soil cover, forest cover, and shrub patches within their home range and negatively with the distance to wetlands. Areas of wetlands and fallow land might be used for foraging since Amur Falcons mostly depend on an insect diet. Additionally, we found that rarely burned habitats were preferred. Overall, the effect of landscape variables on the choice of actual nest sites appeared to be rather small. We used different classification methods to predict the probability of occurrence, of which the Random forest method showed the highest accuracy. The areas determined as suitable habitat showed a high concordance with the actual nest locations. We conclude that Amur Falcons prefer to occupy newly built (domed) nests to ensure high nest quality, as well as nests surrounded by available feeding habitats.}, language = {en} } @article{BornhorstNustedeFudickar2019, author = {Bornhorst, Julia and Nustede, Eike Jannik and Fudickar, Sebastian}, title = {Mass Surveilance of C. elegans-Smartphone-Based DIY Microscope and Machine-Learning-Based Approach for Worm Detection}, series = {Sensors}, volume = {19}, journal = {Sensors}, number = {6}, publisher = {MDPI}, address = {Basel}, issn = {1424-8220}, doi = {10.3390/s19061468}, pages = {14}, year = {2019}, abstract = {The nematode Caenorhabditis elegans (C. elegans) is often used as an alternative animal model due to several advantages such as morphological changes that can be seen directly under a microscope. Limitations of the model include the usage of expensive and cumbersome microscopes, and restrictions of the comprehensive use of C. elegans for toxicological trials. With the general applicability of the detection of C. elegans from microscope images via machine learning, as well as of smartphone-based microscopes, this article investigates the suitability of smartphone-based microscopy to detect C. elegans in a complete Petri dish. Thereby, the article introduces a smartphone-based microscope (including optics, lighting, and housing) for monitoring C. elegans and the corresponding classification via a trained Histogram of Oriented Gradients (HOG) feature-based Support Vector Machine for the automatic detection of C. elegans. Evaluation showed classification sensitivity of 0.90 and specificity of 0.85, and thereby confirms the general practicability of the chosen approach.}, language = {en} } @article{RyoJeschkeRilligetal.2020, author = {Ryo, Masahiro and Jeschke, Jonathan M. and Rillig, Matthias C. and Heger, Tina}, title = {Machine learning with the hierarchy-of-hypotheses (HoH) approach discovers novel pattern in studies on biological invasions}, series = {Research synthesis methods}, volume = {11}, journal = {Research synthesis methods}, number = {1}, publisher = {Wiley}, address = {Hoboken}, issn = {1759-2879}, doi = {10.1002/jrsm.1363}, pages = {66 -- 73}, year = {2020}, abstract = {Research synthesis on simple yet general hypotheses and ideas is challenging in scientific disciplines studying highly context-dependent systems such as medical, social, and biological sciences. This study shows that machine learning, equation-free statistical modeling of artificial intelligence, is a promising synthesis tool for discovering novel patterns and the source of controversy in a general hypothesis. We apply a decision tree algorithm, assuming that evidence from various contexts can be adequately integrated in a hierarchically nested structure. As a case study, we analyzed 163 articles that studied a prominent hypothesis in invasion biology, the enemy release hypothesis. We explored if any of the nine attributes that classify each study can differentiate conclusions as classification problem. Results corroborated that machine learning can be useful for research synthesis, as the algorithm could detect patterns that had been already focused in previous narrative reviews. Compared with the previous synthesis study that assessed the same evidence collection based on experts' judgement, the algorithm has newly proposed that the studies focusing on Asian regions mostly supported the hypothesis, suggesting that more detailed investigations in these regions can enhance our understanding of the hypothesis. We suggest that machine learning algorithms can be a promising synthesis tool especially where studies (a) reformulate a general hypothesis from different perspectives, (b) use different methods or variables, or (c) report insufficient information for conducting meta-analyses.}, language = {en} } @article{Doellner2020, author = {D{\"o}llner, J{\"u}rgen Roland Friedrich}, title = {Geospatial artificial intelligence}, series = {Journal of photogrammetry, remote sensing and geoinformation science : PFG : Photogrammetrie, Fernerkundung, Geoinformation}, volume = {88}, journal = {Journal of photogrammetry, remote sensing and geoinformation science : PFG : Photogrammetrie, Fernerkundung, Geoinformation}, number = {1}, publisher = {Springer International Publishing}, address = {Cham}, issn = {2512-2789}, doi = {10.1007/s41064-020-00102-3}, pages = {15 -- 24}, year = {2020}, abstract = {Artificial intelligence (AI) is changing fundamentally the way how IT solutions are implemented and operated across all application domains, including the geospatial domain. This contribution outlines AI-based techniques for 3D point clouds and geospatial digital twins as generic components of geospatial AI. First, we briefly reflect on the term "AI" and outline technology developments needed to apply AI to IT solutions, seen from a software engineering perspective. Next, we characterize 3D point clouds as key category of geodata and their role for creating the basis for geospatial digital twins; we explain the feasibility of machine learning (ML) and deep learning (DL) approaches for 3D point clouds. In particular, we argue that 3D point clouds can be seen as a corpus with similar properties as natural language corpora and formulate a "Naturalness Hypothesis" for 3D point clouds. In the main part, we introduce a workflow for interpreting 3D point clouds based on ML/DL approaches that derive domain-specific and application-specific semantics for 3D point clouds without having to create explicit spatial 3D models or explicit rule sets. Finally, examples are shown how ML/DL enables us to efficiently build and maintain base data for geospatial digital twins such as virtual 3D city models, indoor models, or building information models.}, language = {en} } @article{LevyMussackBrunneretal.2020, author = {Levy, Jessica and Mussack, Dominic and Brunner, Martin and Keller, Ulrich and Cardoso-Leite, Pedro and Fischbach, Antoine}, title = {Contrasting classical and machine learning approaches in the estimation of value-added scores in large-scale educational data}, series = {Frontiers in psychology}, volume = {11}, journal = {Frontiers in psychology}, publisher = {Frontiers Research Foundation}, address = {Lausanne}, issn = {1664-1078}, doi = {10.3389/fpsyg.2020.02190}, pages = {18}, year = {2020}, abstract = {There is no consensus on which statistical model estimates school value-added (VA) most accurately. To date, the two most common statistical models used for the calculation of VA scores are two classical methods: linear regression and multilevel models. These models have the advantage of being relatively transparent and thus understandable for most researchers and practitioners. However, these statistical models are bound to certain assumptions (e.g., linearity) that might limit their prediction accuracy. Machine learning methods, which have yielded spectacular results in numerous fields, may be a valuable alternative to these classical models. Although big data is not new in general, it is relatively new in the realm of social sciences and education. New types of data require new data analytical approaches. Such techniques have already evolved in fields with a long tradition in crunching big data (e.g., gene technology). The objective of the present paper is to competently apply these "imported" techniques to education data, more precisely VA scores, and assess when and how they can extend or replace the classical psychometrics toolbox. The different models include linear and non-linear methods and extend classical models with the most commonly used machine learning methods (i.e., random forest, neural networks, support vector machines, and boosting). We used representative data of 3,026 students in 153 schools who took part in the standardized achievement tests of the Luxembourg School Monitoring Program in grades 1 and 3. Multilevel models outperformed classical linear and polynomial regressions, as well as different machine learning models. However, it could be observed that across all schools, school VA scores from different model types correlated highly. Yet, the percentage of disagreements as compared to multilevel models was not trivial and real-life implications for individual schools may still be dramatic depending on the model type used. Implications of these results and possible ethical concerns regarding the use of machine learning methods for decision-making in education are discussed.}, language = {en} }