@article{KuehnHainzlDahmetal.2022, author = {K{\"u}hn, Daniela and Hainzl, Sebastian and Dahm, Torsten and Richter, Gudrun and Vera Rodriguez, Ismael}, title = {A review of source models to further the understanding of the seismicity of the Groningen field}, series = {Netherlands journal of geosciences : NJG}, volume = {101}, journal = {Netherlands journal of geosciences : NJG}, publisher = {Cambridge Univ. Press}, address = {Cambridge}, issn = {0016-7746}, doi = {10.1017/njg.2022.7}, pages = {12}, year = {2022}, abstract = {The occurrence of felt earthquakes due to gas production in Groningen has initiated numerous studies and model attempts to understand and quantify induced seismicity in this region. The whole bandwidth of available models spans the range from fully deterministic models to purely empirical and stochastic models. In this article, we summarise the most important model approaches, describing their main achievements and limitations. In addition, we discuss remaining open questions and potential future directions of development.}, language = {en} } @phdthesis{Chen2023, author = {Chen, Junchao}, title = {A self-adaptive resilient method for implementing and managing the high-reliability processing system}, doi = {10.25932/publishup-58313}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-583139}, school = {Universit{\"a}t Potsdam}, pages = {XXIII, 167}, year = {2023}, abstract = {As a result of CMOS scaling, radiation-induced Single-Event Effects (SEEs) in electronic circuits became a critical reliability issue for modern Integrated Circuits (ICs) operating under harsh radiation conditions. SEEs can be triggered in combinational or sequential logic by the impact of high-energy particles, leading to destructive or non-destructive faults, resulting in data corruption or even system failure. Typically, the SEE mitigation methods are deployed statically in processing architectures based on the worst-case radiation conditions, which is most of the time unnecessary and results in a resource overhead. Moreover, the space radiation conditions are dynamically changing, especially during Solar Particle Events (SPEs). The intensity of space radiation can differ over five orders of magnitude within a few hours or days, resulting in several orders of magnitude fault probability variation in ICs during SPEs. This thesis introduces a comprehensive approach for designing a self-adaptive fault resilient multiprocessing system to overcome the static mitigation overhead issue. This work mainly addresses the following topics: (1) Design of on-chip radiation particle monitor for real-time radiation environment detection, (2) Investigation of space environment predictor, as support for solar particle events forecast, (3) Dynamic mode configuration in the resilient multiprocessing system. Therefore, according to detected and predicted in-flight space radiation conditions, the target system can be configured to use no mitigation or low-overhead mitigation during non-critical periods of time. The redundant resources can be used to improve system performance or save power. On the other hand, during increased radiation activity periods, such as SPEs, the mitigation methods can be dynamically configured appropriately depending on the real-time space radiation environment, resulting in higher system reliability. Thus, a dynamic trade-off in the target system between reliability, performance and power consumption in real-time can be achieved. All results of this work are evaluated in a highly reliable quad-core multiprocessing system that allows the self-adaptive setting of optimal radiation mitigation mechanisms during run-time. Proposed methods can serve as a basis for establishing a comprehensive self-adaptive resilient system design process. Successful implementation of the proposed design in the quad-core multiprocessor shows its application perspective also in the other designs.}, language = {en} } @phdthesis{Floeter2005, author = {Fl{\"o}ter, Andr{\´e}}, title = {Analyzing biological expression data based on decision tree induction}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-6416}, school = {Universit{\"a}t Potsdam}, year = {2005}, abstract = {Modern biological analysis techniques supply scientists with various forms of data. One category of such data are the so called "expression data". These data indicate the quantities of biochemical compounds present in tissue samples. Recently, expression data can be generated at a high speed. This leads in turn to amounts of data no longer analysable by classical statistical techniques. Systems biology is the new field that focuses on the modelling of this information. At present, various methods are used for this purpose. One superordinate class of these meth­ods is machine learning. Methods of this kind had, until recently, predominantly been used for classification and prediction tasks. This neglected a powerful secondary benefit: the ability to induce interpretable models. Obtaining such models from data has become a key issue within Systems biology. Numerous approaches have been proposed and intensively discussed. This thesis focuses on the examination and exploitation of one basic technique: decision trees. The concept of comparing sets of decision trees is developed. This method offers the pos­sibility of identifying significant thresholds in continuous or discrete valued attributes through their corresponding set of decision trees. Finding significant thresholds in attributes is a means of identifying states in living organisms. Knowing about states is an invaluable clue to the un­derstanding of dynamic processes in organisms. Applied to metabolite concentration data, the proposed method was able to identify states which were not found with conventional techniques for threshold extraction. A second approach exploits the structure of sets of decision trees for the discovery of com­binatorial dependencies between attributes. Previous work on this issue has focused either on expensive computational methods or the interpretation of single decision trees ­ a very limited exploitation of the data. This has led to incomplete or unstable results. That is why a new method is developed that uses sets of decision trees to overcome these limitations. Both the introduced methods are available as software tools. They can be applied consecu­tively or separately. That way they make up a package of analytical tools that usefully supplement existing methods. By means of these tools, the newly introduced methods were able to confirm existing knowl­edge and to suggest interesting and new relationships between metabolites.}, subject = {Molekulare Bioinformatik}, language = {en} } @article{GhafarianWielandLuettschwageretal.2022, author = {Ghafarian, Fatemeh and Wieland, Ralf and L{\"u}ttschwager, Dietmar and Nendel, Claas}, title = {Application of extreme gradient boosting and Shapley Additive explanations to predict temperature regimes inside forests from standard open-field meteorological data}, series = {Environmental modelling \& software with environment data news}, volume = {156}, journal = {Environmental modelling \& software with environment data news}, publisher = {Elsevier}, address = {Oxford}, issn = {1364-8152}, doi = {10.1016/j.envsoft.2022.105466}, pages = {11}, year = {2022}, abstract = {Forest microclimate can buffer biotic responses to summer heat waves, which are expected to become more extreme under climate warming. Prediction of forest microclimate is limited because meteorological observation standards seldom include situations inside forests. We use eXtreme Gradient Boosting - a Machine Learning technique - to predict the microclimate of forest sites in Brandenburg, Germany, using seasonal data comprising weather features. The analysis was amended by applying a SHapley Additive explanation to show the interaction effect of variables and individualised feature attributions. We evaluate model performance in comparison to artificial neural networks, random forest, support vector machine, and multi-linear regression. After implementing a feature selection, an ensemble approach was applied to combine individual models for each forest and improve robustness over a given single prediction model. The resulting model can be applied to translate climate change scenarios into temperatures inside forests to assess temperature-related ecosystem services provided by forests.}, language = {en} } @phdthesis{Brill2022, author = {Brill, Fabio Alexander}, title = {Applications of machine learning and open geospatial data in flood risk modelling}, doi = {10.25932/publishup-55594}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-555943}, school = {Universit{\"a}t Potsdam}, pages = {xix, 124}, year = {2022}, abstract = {Der technologische Fortschritt erlaubt es, zunehmend komplexe Vorhersagemodelle auf Basis immer gr{\"o}ßerer Datens{\"a}tze zu produzieren. F{\"u}r das Risikomanagement von Naturgefahren sind eine Vielzahl von Modellen als Entscheidungsgrundlage notwendig, z.B. in der Auswertung von Beobachtungsdaten, f{\"u}r die Vorhersage von Gefahrenszenarien, oder zur statistischen Absch{\"a}tzung der zu erwartenden Sch{\"a}den. Es stellt sich also die Frage, inwiefern moderne Modellierungsans{\"a}tze wie das maschinelle Lernen oder Data-Mining in diesem Themenbereich sinnvoll eingesetzt werden k{\"o}nnen. Zus{\"a}tzlich ist im Hinblick auf die Datenverf{\"u}gbarkeit und -zug{\"a}nglichkeit ein Trend zur {\"O}ffnung (open data) zu beobachten. Thema dieser Arbeit ist daher, die M{\"o}glichkeiten und Grenzen des maschinellen Lernens und frei verf{\"u}gbarer Geodaten auf dem Gebiet der Hochwasserrisikomodellierung im weiteren Sinne zu untersuchen. Da dieses {\"u}bergeordnete Thema sehr breit ist, werden einzelne relevante Aspekte herausgearbeitet und detailliert betrachtet. Eine prominente Datenquelle im Bereich Hochwasser ist die satellitenbasierte Kartierung von {\"U}berflutungsfl{\"a}chen, die z.B. {\"u}ber den Copernicus Service der Europ{\"a}ischen Union frei zur Verf{\"u}gung gestellt werden. Große Hoffnungen werden in der wissenschaftlichen Literatur in diese Produkte gesetzt, sowohl f{\"u}r die akute Unterst{\"u}tzung der Einsatzkr{\"a}fte im Katastrophenfall, als auch in der Modellierung mittels hydrodynamischer Modelle oder zur Schadensabsch{\"a}tzung. Daher wurde ein Fokus in dieser Arbeit auf die Untersuchung dieser Flutmasken gelegt. Aus der Beobachtung, dass die Qualit{\"a}t dieser Produkte in bewaldeten und urbanen Gebieten unzureichend ist, wurde ein Verfahren zur nachtr{\"a}glichenVerbesserung mittels maschinellem Lernen entwickelt. Das Verfahren basiert auf einem Klassifikationsalgorithmus der nur Trainingsdaten von einer vorherzusagenden Klasse ben{\"o}tigt, im konkreten Fall also Daten von {\"U}berflutungsfl{\"a}chen, nicht jedoch von der negativen Klasse (trockene Gebiete). Die Anwendung f{\"u}r Hurricane Harvey in Houston zeigt großes Potenzial der Methode, abh{\"a}ngig von der Qualit{\"a}t der urspr{\"u}nglichen Flutmaske. Anschließend wird anhand einer prozessbasierten Modellkette untersucht, welchen Einfluss implementierte physikalische Prozessdetails auf das vorhergesagte statistische Risiko haben. Es wird anschaulich gezeigt, was eine Risikostudie basierend auf etablierten Modellen leisten kann. Solche Modellketten sind allerdings bereits f{\"u}r Flusshochwasser sehr komplex, und f{\"u}r zusammengesetzte oder kaskadierende Ereignisse mit Starkregen, Sturzfluten, und weiteren Prozessen, kaum vorhanden. Im vierten Kapitel dieser Arbeit wird daher getestet, ob maschinelles Lernen auf Basis von vollst{\"a}ndigen Schadensdaten einen direkteren Weg zur Schadensmodellierung erm{\"o}glicht, der die explizite Konzeption einer solchen Modellkette umgeht. Dazu wird ein staatlich erhobener Datensatz der gesch{\"a}digten Geb{\"a}ude w{\"a}hrend des schweren El Ni{\~n}o Ereignisses 2017 in Peru verwendet. In diesem Kontext werden auch die M{\"o}glichkeiten des Data-Mining zur Extraktion von Prozessverst{\"a}ndnis ausgelotet. Es kann gezeigt werden, dass diverse frei verf{\"u}gbare Geodaten n{\"u}tzliche Informationen f{\"u}r die Gefahren- und Schadensmodellierung von komplexen Flutereignissen liefern, z.B. satellitenbasierte Regenmessungen, topographische und hydrographische Information, kartierte Siedlungsfl{\"a}chen, sowie Indikatoren aus Spektraldaten. Zudem zeigen sich Erkenntnisse zu den Sch{\"a}digungsprozessen, die im Wesentlichen mit den vorherigen Erwartungen in Einklang stehen. Die maximale Regenintensit{\"a}t wirkt beispielsweise in St{\"a}dten und steilen Schluchten st{\"a}rker sch{\"a}digend, w{\"a}hrend die Niederschlagssumme in tiefliegenden Flussgebieten und bewaldeten Regionen als aussagekr{\"a}ftiger befunden wurde. L{\"a}ndliche Gebiete in Peru weisen in der pr{\"a}sentierten Studie eine h{\"o}here Vulnerabilit{\"a}t als die Stadtgebiete auf. Jedoch werden auch die grunds{\"a}tzlichen Grenzen der Methodik und die Abh{\"a}ngigkeit von spezifischen Datens{\"a}tzen and Algorithmen offenkundig. In der {\"u}bergreifenden Diskussion werden schließlich die verschiedenen Methoden - prozessbasierte Modellierung, pr{\"a}diktives maschinelles Lernen, und Data-Mining - mit Blick auf die Gesamtfragestellungen evaluiert. Im Bereich der Gefahrenbeobachtung scheint eine Fokussierung auf neue Algorithmen sinnvoll. Im Bereich der Gefahrenmodellierung, insbesondere f{\"u}r Flusshochwasser, wird eher die Verbesserung von physikalischen Modellen, oder die Integration von prozessbasierten und statistischen Verfahren angeraten. In der Schadensmodellierung fehlen nach wie vor die großen repr{\"a}sentativen Datens{\"a}tze, die f{\"u}r eine breite Anwendung von maschinellem Lernen Voraussetzung ist. Daher ist die Verbesserung der Datengrundlage im Bereich der Sch{\"a}den derzeit als wichtiger einzustufen als die Auswahl der Algorithmen.}, language = {en} } @article{BrandesSicksBerger2021, author = {Brandes, Stefanie and Sicks, Florian and Berger, Anne}, title = {Behaviour classification on giraffes (Giraffa camelopardalis) using machine learning algorithms on triaxial acceleration data of two commonly used GPS devices and its possible application for their management and conservation}, series = {Sensors}, volume = {21}, journal = {Sensors}, number = {6}, publisher = {MDPI}, address = {Basel}, issn = {1424-8220}, doi = {10.3390/s21062229}, pages = {22}, year = {2021}, abstract = {Averting today's loss of biodiversity and ecosystem services can be achieved through conservation efforts, especially of keystone species. Giraffes (Giraffa camelopardalis) play an important role in sustaining Africa's ecosystems, but are 'vulnerable' according to the IUCN Red List since 2016. Monitoring an animal's behavior in the wild helps to develop and assess their conservation management. One mechanism for remote tracking of wildlife behavior is to attach accelerometers to animals to record their body movement. We tested two different commercially available high-resolution accelerometers, e-obs and Africa Wildlife Tracking (AWT), attached to the top of the heads of three captive giraffes and analyzed the accuracy of automatic behavior classifications, focused on the Random Forests algorithm. For both accelerometers, behaviors of lower variety in head and neck movements could be better predicted (i.e., feeding above eye level, mean prediction accuracy e-obs/AWT: 97.6\%/99.7\%; drinking: 96.7\%/97.0\%) than those with a higher variety of body postures (such as standing: 90.7-91.0\%/75.2-76.7\%; rumination: 89.6-91.6\%/53.5-86.5\%). Nonetheless both devices come with limitations and especially the AWT needs technological adaptations before applying it on animals in the wild. Nevertheless, looking at the prediction results, both are promising accelerometers for behavioral classification of giraffes. Therefore, these devices when applied to free-ranging animals, in combination with GPS tracking, can contribute greatly to the conservation of giraffes.}, language = {en} } @article{HampfNendelStreyetal.2021, author = {Hampf, Anna and Nendel, Claas and Strey, Simone and Strey, Robert}, title = {Biotic yield losses in the Southern Amazon, Brazil}, series = {Frontiers in plant science : FPLS}, volume = {12}, journal = {Frontiers in plant science : FPLS}, publisher = {Frontiers Media}, address = {Lausanne}, issn = {1664-462X}, doi = {10.3389/fpls.2021.621168}, pages = {16}, year = {2021}, abstract = {Pathogens and animal pests (P\&A) are a major threat to global food security as they directly affect the quantity and quality of food. The Southern Amazon, Brazil's largest domestic region for soybean, maize and cotton production, is particularly vulnerable to the outbreak of P\&A due to its (sub)tropical climate and intensive farming systems. However, little is known about the spatial distribution of P\&A and the related yield losses. Machine learning approaches for the automated recognition of plant diseases can help to overcome this research gap. The main objectives of this study are to (1) evaluate the performance of Convolutional Neural Networks (ConvNets) in classifying P\&A, (2) map the spatial distribution of P\&A in the Southern Amazon, and (3) quantify perceived yield and economic losses for the main soybean and maize P\&A. The objectives were addressed by making use of data collected with the smartphone application Plantix. The core of the app's functioning is the automated recognition of plant diseases via ConvNets. Data on expected yield losses were gathered through a short survey included in an "expert" version of the application, which was distributed among agronomists. Between 2016 and 2020, Plantix users collected approximately 78,000 georeferenced P\&A images in the Southern Amazon. The study results indicate a high performance of the trained ConvNets in classifying 420 different crop-disease combinations. Spatial distribution maps and expert-based yield loss estimates indicate that maize rust, bacterial stalk rot and the fall armyworm are among the most severe maize P\&A, whereas soybean is mainly affected by P\&A like anthracnose, downy mildew, frogeye leaf spot, stink bugs and brown spot. Perceived soybean and maize yield losses amount to 12 and 16\%, respectively, resulting in annual yield losses of approximately 3.75 million tonnes for each crop and economic losses of US\$2 billion for both crops together. The high level of accuracy of the trained ConvNets, when paired with widespread use from following a citizen-science approach, results in a data source that will shed new light on yield loss estimates, e.g., for the analysis of yield gaps and the development of measures to minimise them.}, language = {en} } @article{FrommholdHeimBarabanovetal.2019, author = {Frommhold, Martin and Heim, Arend and Barabanov, Mikhail and Maier, Franziska and M{\"u}hle, Ralf-Udo and Smirenski, Sergei M. and Heim, Wieland}, title = {Breeding habitat and nest-site selection by an obligatory "nest-cleptoparasite", the Amur Falcon Falco amurensis}, series = {Ecology and evolution}, volume = {9}, journal = {Ecology and evolution}, number = {24}, publisher = {Wiley}, address = {Hoboken}, issn = {2045-7758}, doi = {10.1002/ece3.5878}, pages = {14430 -- 14441}, year = {2019}, abstract = {The selection of a nest site is crucial for successful reproduction of birds. Animals which re-use or occupy nest sites constructed by other species often have limited choice. Little is known about the criteria of nest-stealing species to choose suitable nesting sites and habitats. Here, we analyze breeding-site selection of an obligatory "nest-cleptoparasite", the Amur Falcon Falco amurensis. We collected data on nest sites at Muraviovka Park in the Russian Far East, where the species breeds exclusively in nests of the Eurasian Magpie Pica pica. We sampled 117 Eurasian Magpie nests, 38 of which were occupied by Amur Falcons. Nest-specific variables were assessed, and a recently developed habitat classification map was used to derive landscape metrics. We found that Amur Falcons chose a wide range of nesting sites, but significantly preferred nests with a domed roof. Breeding pairs of Eurasian Hobby Falco subbuteo and Eurasian Magpie were often found to breed near the nest in about the same distance as neighboring Amur Falcon pairs. Additionally, the occurrence of the species was positively associated with bare soil cover, forest cover, and shrub patches within their home range and negatively with the distance to wetlands. Areas of wetlands and fallow land might be used for foraging since Amur Falcons mostly depend on an insect diet. Additionally, we found that rarely burned habitats were preferred. Overall, the effect of landscape variables on the choice of actual nest sites appeared to be rather small. We used different classification methods to predict the probability of occurrence, of which the Random forest method showed the highest accuracy. The areas determined as suitable habitat showed a high concordance with the actual nest locations. We conclude that Amur Falcons prefer to occupy newly built (domed) nests to ensure high nest quality, as well as nests surrounded by available feeding habitats.}, language = {en} } @article{SchmidtHesseAttingeretal.2020, author = {Schmidt, Lennart and Hesse, Falk and Attinger, Sabine and Kumar, Rohini}, title = {Challenges in applying machine learning models for hydrological inference}, series = {Water resources research}, volume = {56}, journal = {Water resources research}, number = {5}, publisher = {American Geophysical Union}, address = {Washington}, issn = {0043-1397}, doi = {10.1029/2019WR025924}, pages = {10}, year = {2020}, abstract = {Machine learning (ML) algorithms are being increasingly used in Earth and Environmental modeling studies owing to the ever-increasing availability of diverse data sets and computational resources as well as advancement in ML algorithms. Despite advances in their predictive accuracy, the usefulness of ML algorithms for inference remains elusive. In this study, we employ two popular ML algorithms, artificial neural networks and random forest, to analyze a large data set of flood events across Germany with the goals to analyze their predictive accuracy and their usability to provide insights to hydrologic system functioning. The results of the ML algorithms are contrasted against a parametric approach based on multiple linear regression. For analysis, we employ a model-agnostic framework named Permuted Feature Importance to derive the influence of models' predictors. This allows us to compare the results of different algorithms for the first time in the context of hydrology. Our main findings are that (1) the ML models achieve higher prediction accuracy than linear regression, (2) the results reflect basic hydrological principles, but (3) further inference is hindered by the heterogeneity of results across algorithms. Thus, we conclude that the problem of equifinality as known from classical hydrological modeling also exists for ML and severely hampers its potential for inference. To account for the observed problems, we propose that when employing ML for inference, this should be made by using multiple algorithms and multiple methods, of which the latter should be embedded in a cross-validation routine.}, language = {en} } @misc{SchmidtHesseAttingeretal.2020, author = {Schmidt, Lennart and Heße, Falk and Attinger, Sabine and Kumar, Rohini}, title = {Challenges in applying machine learning models for hydrological inference: a case study for flooding events across Germany}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {5}, issn = {1866-8372}, doi = {10.25932/publishup-52384}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-523843}, pages = {12}, year = {2020}, abstract = {Machine learning (ML) algorithms are being increasingly used in Earth and Environmental modeling studies owing to the ever-increasing availability of diverse data sets and computational resources as well as advancement in ML algorithms. Despite advances in their predictive accuracy, the usefulness of ML algorithms for inference remains elusive. In this study, we employ two popular ML algorithms, artificial neural networks and random forest, to analyze a large data set of flood events across Germany with the goals to analyze their predictive accuracy and their usability to provide insights to hydrologic system functioning. The results of the ML algorithms are contrasted against a parametric approach based on multiple linear regression. For analysis, we employ a model-agnostic framework named Permuted Feature Importance to derive the influence of models' predictors. This allows us to compare the results of different algorithms for the first time in the context of hydrology. Our main findings are that (1) the ML models achieve higher prediction accuracy than linear regression, (2) the results reflect basic hydrological principles, but (3) further inference is hindered by the heterogeneity of results across algorithms. Thus, we conclude that the problem of equifinality as known from classical hydrological modeling also exists for ML and severely hampers its potential for inference. To account for the observed problems, we propose that when employing ML for inference, this should be made by using multiple algorithms and multiple methods, of which the latter should be embedded in a cross-validation routine.}, language = {en} }