@article{LischeidWebberSommeretal.2022, author = {Lischeid, Gunnar and Webber, Heidi and Sommer, Michael and Nendel, Claas and Ewert, Frank}, title = {Machine learning in crop yield modelling}, series = {Agricultural and forest meteorology}, volume = {312}, journal = {Agricultural and forest meteorology}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0168-1923}, doi = {10.1016/j.agrformet.2021.108698}, pages = {23}, year = {2022}, abstract = {Provisioning a sufficient stable source of food requires sound knowledge about current and upcoming threats to agricultural production. To that end machine learning approaches were used to identify the prevailing climatic and soil hydrological drivers of spatial and temporal yield variability of four crops, comprising 40 years yield data each from 351 counties in Germany. Effects of progress in agricultural management and breeding were subtracted from the data prior the machine learning modelling by fitting smooth non-linear trends to the 95th percentiles of observed yield data. An extensive feature selection approach was followed then to identify the most relevant predictors out of a large set of candidate predictors, comprising various soil and meteorological data. Particular emphasis was placed on studying the uniqueness of identified key predictors. Random Forest and Support Vector Machine models yielded similar although not identical results, capturing between 50\% and 70\% of the spatial and temporal variance of silage maize, winter barley, winter rapeseed and winter wheat yield. Equally good performance could be achieved with different sets of predictors. Thus identification of the most reliable models could not be based on the outcome of the model study only but required expert's judgement. Relationships between drivers and response often exhibited optimum curves, especially for summer air temperature and precipitation. In contrast, soil moisture clearly proved less relevant compared to meteorological drivers. In view of the expected climate change both excess precipitation and the excess heat effect deserve more attention in breeding as well as in crop modelling.}, language = {en} } @article{Perscheid2021, author = {Perscheid, Cindy}, title = {Comprior}, series = {BMC Bioinformatics}, volume = {22}, journal = {BMC Bioinformatics}, publisher = {Springer Nature}, address = {London}, issn = {1471-2105}, doi = {10.1186/s12859-021-04308-z}, pages = {1 -- 15}, year = {2021}, abstract = {Background Reproducible benchmarking is important for assessing the effectiveness of novel feature selection approaches applied on gene expression data, especially for prior knowledge approaches that incorporate biological information from online knowledge bases. However, no full-fledged benchmarking system exists that is extensible, provides built-in feature selection approaches, and a comprehensive result assessment encompassing classification performance, robustness, and biological relevance. Moreover, the particular needs of prior knowledge feature selection approaches, i.e. uniform access to knowledge bases, are not addressed. As a consequence, prior knowledge approaches are not evaluated amongst each other, leaving open questions regarding their effectiveness. Results We present the Comprior benchmark tool, which facilitates the rapid development and effortless benchmarking of feature selection approaches, with a special focus on prior knowledge approaches. Comprior is extensible by custom approaches, offers built-in standard feature selection approaches, enables uniform access to multiple knowledge bases, and provides a customizable evaluation infrastructure to compare multiple feature selection approaches regarding their classification performance, robustness, runtime, and biological relevance. Conclusion Comprior allows reproducible benchmarking especially of prior knowledge approaches, which facilitates their applicability and for the first time enables a comprehensive assessment of their effectiveness}, language = {en} } @article{ZhelayskayaVasileShpritsetal.2019, author = {Zhelayskaya, Irina S. and Vasile, Ruggero and Shprits, Yuri Y. and Stolle, Claudia and Matzka, J{\"u}rgen}, title = {Systematic Analysis of Machine Learning and Feature Selection Techniques for Prediction of the Kp Index}, series = {Space Weather: The International Journal of Research and Applications}, volume = {17}, journal = {Space Weather: The International Journal of Research and Applications}, number = {10}, publisher = {American Geophysical Union}, address = {Washington}, issn = {1542-7390}, doi = {10.1029/2019SW002271}, pages = {1461 -- 1486}, year = {2019}, abstract = {The Kp index is a measure of the midlatitude global geomagnetic activity and represents short-term magnetic variations driven by solar wind plasma and interplanetary magnetic field. The Kp index is one of the most widely used indicators for space weather alerts and serves as input to various models, such as for the thermosphere and the radiation belts. It is therefore crucial to predict the Kp index accurately. Previous work in this area has mostly employed artificial neural networks to nowcast Kp, based their inferences on the recent history of Kp and on solar wind measurements at L1. In this study, we systematically test how different machine learning techniques perform on the task of nowcasting and forecasting Kp for prediction horizons of up to 12 hr. Additionally, we investigate different methods of machine learning and information theory for selecting the optimal inputs to a predictive model. We illustrate how these methods can be applied to select the most important inputs to a predictive model of Kp and to significantly reduce input dimensionality. We compare our best performing models based on a reduced set of optimal inputs with the existing models of Kp, using different test intervals, and show how this selection can affect model performance.}, language = {en} }