@misc{HischeLarhlimiSchwarzetal.2012, author = {Hische, Manuela and Larhlimi, Abdelhalim and Schwarz, Franziska and Fischer-Rosinsk{\´y}, Antje and Bobbert, Thomas and Assmann, Anke and Catchpole, Gareth S. and Pfeiffer, Andreas F. H. and Willmitzer, Lothar and Selbig, Joachim and Spranger, Joachim}, title = {A distinct metabolic signature predictsdevelopment of fasting plasma glucose}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch Naturwissenschaftliche Reihe}, number = {850}, issn = {1866-8372}, doi = {10.25932/publishup-42740}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-427400}, pages = {12}, year = {2012}, abstract = {Background High blood glucose and diabetes are amongst the conditions causing the greatest losses in years of healthy life worldwide. Therefore, numerous studies aim to identify reliable risk markers for development of impaired glucose metabolism and type 2 diabetes. However, the molecular basis of impaired glucose metabolism is so far insufficiently understood. The development of so called 'omics' approaches in the recent years promises to identify molecular markers and to further understand the molecular basis of impaired glucose metabolism and type 2 diabetes. Although univariate statistical approaches are often applied, we demonstrate here that the application of multivariate statistical approaches is highly recommended to fully capture the complexity of data gained using high-throughput methods. Methods We took blood plasma samples from 172 subjects who participated in the prospective Metabolic Syndrome Berlin Potsdam follow-up study (MESY-BEPO Follow-up). We analysed these samples using Gas Chromatography coupled with Mass Spectrometry (GC-MS), and measured 286 metabolites. Furthermore, fasting glucose levels were measured using standard methods at baseline, and after an average of six years. We did correlation analysis and built linear regression models as well as Random Forest regression models to identify metabolites that predict the development of fasting glucose in our cohort. Results We found a metabolic pattern consisting of nine metabolites that predicted fasting glucose development with an accuracy of 0.47 in tenfold cross-validation using Random Forest regression. We also showed that adding established risk markers did not improve the model accuracy. However, external validation is eventually desirable. Although not all metabolites belonging to the final pattern are identified yet, the pattern directs attention to amino acid metabolism, energy metabolism and redox homeostasis. Conclusions We demonstrate that metabolites identified using a high-throughput method (GC-MS) perform well in predicting the development of fasting plasma glucose over several years. Notably, not single, but a complex pattern of metabolites propels the prediction and therefore reflects the complexity of the underlying molecular mechanisms. This result could only be captured by application of multivariate statistical approaches. Therefore, we highly recommend the usage of statistical methods that seize the complexity of the information given by high-throughput methods.}, language = {en} } @article{KoumarelasKroschkMosleyetal.2018, author = {Koumarelas, Ioannis and Kroschk, Axel and Mosley, Clifford and Naumann, Felix}, title = {Experience: Enhancing address matching with geocoding and similarity measure selection}, series = {Journal of Data and Information Quality}, volume = {10}, journal = {Journal of Data and Information Quality}, number = {2}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {1936-1955}, doi = {10.1145/3232852}, pages = {1 -- 16}, year = {2018}, abstract = {Given a query record, record matching is the problem of finding database records that represent the same real-world object. In the easiest scenario, a database record is completely identical to the query. However, in most cases, problems do arise, for instance, as a result of data errors or data integrated from multiple sources or received from restrictive form fields. These problems are usually difficult, because they require a variety of actions, including field segmentation, decoding of values, and similarity comparisons, each requiring some domain knowledge. In this article, we study the problem of matching records that contain address information, including attributes such as Street-address and City. To facilitate this matching process, we propose a domain-specific procedure to, first, enrich each record with a more complete representation of the address information through geocoding and reverse-geocoding and, second, to select the best similarity measure per each address attribute that will finally help the classifier to achieve the best f-measure. We report on our experience in selecting geocoding services and discovering similarity measures for a concrete but common industry use-case.}, language = {en} } @article{HellwigTattiSartorietal.2018, author = {Hellwig, Niels and Tatti, Dylan and Sartori, Giacomo and Anschlag, Kerstin and Graefe, Ulfert and Egli, Markus and Gobat, Jean-Michel and Broll, Gabriele}, title = {Modeling spatial patterns of humus forms in montane and subalpine forests}, series = {Sustainability}, volume = {11}, journal = {Sustainability}, number = {1}, publisher = {MDPI}, address = {Basel}, issn = {2071-1050}, doi = {10.3390/su11010048}, pages = {15}, year = {2018}, abstract = {Humus forms are a distinctive morphological indicator of soil organic matter decomposition. The spatial distribution of humus forms depends on environmental factors such as topography, climate and vegetation. In montane and subalpine forests, environmental influences show a high spatial heterogeneity, which is reflected by a high spatial variability of humus forms. This study aims at examining spatial patterns of humus forms and their dependence on the spatial scale in a high mountain forest environment (Val di Sole/Val di Rabbi, Trentino, Italian Alps). On the basis of the distributions of environmental covariates across the study area, we described humus forms at the local scale (six sampling sites), slope scale (60 sampling sites) and landscape scale (30 additional sampling sites). The local variability of humus forms was analyzed with regard to the ground cover type. At the slope and landscape scale, spatial patterns of humus forms were modeled applying random forests and ordinary kriging of the model residuals. The results indicate that the occurrence of the humus form classes Mull, Mullmoder, Moder, Amphi and Eroded Moder generally depends on the topographical position. Local-scale patterns are mostly related to micro-topography (local accumulation and erosion sites) and ground cover, whereas slope-scale patterns are mainly connected with slope exposure and elevation. Patterns at the landscape scale show a rather irregular distribution, as spatial models at this scale do not account for local to slope-scale variations of humus forms. Moreover, models at the slope scale perform distinctly better than at the landscape scale. In conclusion, the results of this study highlight that landscape-scale predictions of humus forms should be accompanied by local- and slope-scale studies in order to enhance the general understanding of humus form patterns.}, language = {en} } @misc{HellwigTattiSartorietal.2019, author = {Hellwig, Niels and Tatti, Dylan and Sartori, Giacomo and Anschlag, Kerstin and Graefe, Ulfert and Egli, Markus and Gobat, Jean-Michel and Broll, Gabriele}, title = {Modeling spatial patterns of humus forms in montane and subalpine forests}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {1128}, issn = {1866-8372}, doi = {10.25932/publishup-47226}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-472265}, pages = {17}, year = {2019}, abstract = {Humus forms are a distinctive morphological indicator of soil organic matter decomposition. The spatial distribution of humus forms depends on environmental factors such as topography, climate and vegetation. In montane and subalpine forests, environmental influences show a high spatial heterogeneity, which is reflected by a high spatial variability of humus forms. This study aims at examining spatial patterns of humus forms and their dependence on the spatial scale in a high mountain forest environment (Val di Sole/Val di Rabbi, Trentino, Italian Alps). On the basis of the distributions of environmental covariates across the study area, we described humus forms at the local scale (six sampling sites), slope scale (60 sampling sites) and landscape scale (30 additional sampling sites). The local variability of humus forms was analyzed with regard to the ground cover type. At the slope and landscape scale, spatial patterns of humus forms were modeled applying random forests and ordinary kriging of the model residuals. The results indicate that the occurrence of the humus form classes Mull, Mullmoder, Moder, Amphi and Eroded Moder generally depends on the topographical position. Local-scale patterns are mostly related to micro-topography (local accumulation and erosion sites) and ground cover, whereas slope-scale patterns are mainly connected with slope exposure and elevation. Patterns at the landscape scale show a rather irregular distribution, as spatial models at this scale do not account for local to slope-scale variations of humus forms. Moreover, models at the slope scale perform distinctly better than at the landscape scale. In conclusion, the results of this study highlight that landscape-scale predictions of humus forms should be accompanied by local- and slope-scale studies in order to enhance the general understanding of humus form patterns.}, language = {en} } @article{FrommholdHeimBarabanovetal.2019, author = {Frommhold, Martin and Heim, Arend and Barabanov, Mikhail and Maier, Franziska and M{\"u}hle, Ralf-Udo and Smirenski, Sergei M. and Heim, Wieland}, title = {Breeding habitat and nest-site selection by an obligatory "nest-cleptoparasite", the Amur Falcon Falco amurensis}, series = {Ecology and evolution}, volume = {9}, journal = {Ecology and evolution}, number = {24}, publisher = {Wiley}, address = {Hoboken}, issn = {2045-7758}, doi = {10.1002/ece3.5878}, pages = {14430 -- 14441}, year = {2019}, abstract = {The selection of a nest site is crucial for successful reproduction of birds. Animals which re-use or occupy nest sites constructed by other species often have limited choice. Little is known about the criteria of nest-stealing species to choose suitable nesting sites and habitats. Here, we analyze breeding-site selection of an obligatory "nest-cleptoparasite", the Amur Falcon Falco amurensis. We collected data on nest sites at Muraviovka Park in the Russian Far East, where the species breeds exclusively in nests of the Eurasian Magpie Pica pica. We sampled 117 Eurasian Magpie nests, 38 of which were occupied by Amur Falcons. Nest-specific variables were assessed, and a recently developed habitat classification map was used to derive landscape metrics. We found that Amur Falcons chose a wide range of nesting sites, but significantly preferred nests with a domed roof. Breeding pairs of Eurasian Hobby Falco subbuteo and Eurasian Magpie were often found to breed near the nest in about the same distance as neighboring Amur Falcon pairs. Additionally, the occurrence of the species was positively associated with bare soil cover, forest cover, and shrub patches within their home range and negatively with the distance to wetlands. Areas of wetlands and fallow land might be used for foraging since Amur Falcons mostly depend on an insect diet. Additionally, we found that rarely burned habitats were preferred. Overall, the effect of landscape variables on the choice of actual nest sites appeared to be rather small. We used different classification methods to predict the probability of occurrence, of which the Random forest method showed the highest accuracy. The areas determined as suitable habitat showed a high concordance with the actual nest locations. We conclude that Amur Falcons prefer to occupy newly built (domed) nests to ensure high nest quality, as well as nests surrounded by available feeding habitats.}, language = {en} } @article{LehmannZhengRyoetal.2020, author = {Lehmann, Anika and Zheng, Weishuang and Ryo, Masahiro and Soutschek, Katharina and Roy, Julien and Rongstock, Rebecca and Maaß, Stefanie and Rillig, Matthias C.}, title = {Fungal traits important for soil aggregation}, series = {Frontiers in microbiology}, volume = {10}, journal = {Frontiers in microbiology}, publisher = {Frontiers Media}, address = {Lausanne}, issn = {1664-302X}, doi = {10.3389/fmicb.2019.02904}, pages = {13}, year = {2020}, abstract = {Soil structure, the complex arrangement of soil into aggregates and pore spaces, is a key feature of soils and soil biota. Among them, filamentous saprobic fungi have well-documented effects on soil aggregation. However, it is unclear what properties, or traits, determine the overall positive effect of fungi on soil aggregation. To achieve progress, it would be helpful to systematically investigate a broad suite of fungal species for their trait expression and the relation of these traits to soil aggregation. Here, we apply a trait-based approach to a set of 15 traits measured under standardized conditions on 31 fungal strains including Ascomycota, Basidiomycota, and Mucoromycota, all isolated from the same soil. We find large differences among these fungi in their ability to aggregate soil, including neutral to positive effects, and we document large differences in trait expression among strains. We identify biomass density, i.e., the density with which a mycelium grows (positive effects), leucine aminopeptidase activity (negative effects) and phylogeny as important factors explaining differences in soil aggregate formation (SAF) among fungal strains; importantly, growth rate was not among the important traits. Our results point to a typical suite of traits characterizing fungi that are good soil aggregators, and our findings illustrate the power of employing a trait-based approach to unravel biological mechanisms underpinning soil aggregation. Such an approach could now be extended also to other soil biota groups. In an applied context of restoration and agriculture, such trait information can inform management, for example to prioritize practices that favor the expression of more desirable fungal traits.}, language = {en} } @article{HempelAdolphsLandwehretal.2020, author = {Hempel, Sabrina and Adolphs, Julian and Landwehr, Niels and Janke, David and Amon, Thomas}, title = {How the selection of training data and modeling approach affects the estimation of ammonia emissions from a naturally ventilated dairy barn—classical statistics versus machine learning}, series = {Sustainability}, volume = {12}, journal = {Sustainability}, number = {3}, publisher = {MDPI}, address = {Basel}, issn = {2071-1050}, doi = {10.3390/su12031030}, pages = {18}, year = {2020}, abstract = {Environmental protection efforts can only be effective in the long term with a reliable quantification of pollutant gas emissions as a first step to mitigation. Measurement and analysis strategies must permit the accurate extrapolation of emission values. We systematically analyzed the added value of applying modern machine learning methods in the process of monitoring emissions from naturally ventilated livestock buildings to the atmosphere. We considered almost 40 weeks of hourly emission values from a naturally ventilated dairy cattle barn in Northern Germany. We compared model predictions using 27 different scenarios of temporal sampling, multiple measures of model accuracy, and eight different regression approaches. The error of the predicted emission values with the tested measurement protocols was, on average, well below 20\%. The sensitivity of the prediction to the selected training dataset was worse for the ordinary multilinear regression. Gradient boosting and random forests provided the most accurate and robust emission value predictions, accompanied by the second-smallest model errors. Most of the highly ranked scenarios involved six measurement periods, while the scenario with the best overall performance was: One measurement period in summer and three in the transition periods, each lasting for 14 days.}, language = {en} } @article{HempelAdolphsLandwehretal.2020, author = {Hempel, Sabrina and Adolphs, Julian and Landwehr, Niels and Willink, Dilya and Janke, David and Amon, Thomas}, title = {Supervised machine learning to assess methane emissions of a dairy building with natural ventilation}, series = {Applied Sciences}, volume = {10}, journal = {Applied Sciences}, number = {19}, publisher = {MDPI}, address = {Basel}, issn = {2076-3417}, doi = {10.3390/app10196938}, pages = {21}, year = {2020}, abstract = {A reliable quantification of greenhouse gas emissions is a basis for the development of adequate mitigation measures. Protocols for emission measurements and data analysis approaches to extrapolate to accurate annual emission values are a substantial prerequisite in this context. We systematically analyzed the benefit of supervised machine learning methods to project methane emissions from a naturally ventilated cattle building with a concrete solid floor and manure scraper located in Northern Germany. We took into account approximately 40 weeks of hourly emission measurements and compared model predictions using eight regression approaches, 27 different sampling scenarios and four measures of model accuracy. Data normalization was applied based on median and quartile range. A correlation analysis was performed to evaluate the influence of individual features. This indicated only a very weak linear relation between the methane emission and features that are typically used to predict methane emission values of naturally ventilated barns. It further highlighted the added value of including day-time and squared ambient temperature as features. The error of the predicted emission values was in general below 10\%. The results from Gaussian processes, ordinary multilinear regression and neural networks were least robust. More robust results were obtained with multilinear regression with regularization, support vector machines and particularly the ensemble methods gradient boosting and random forest. The latter had the added value to be rather insensitive against the normalization procedure. In the case of multilinear regression, also the removal of not significantly linearly related variables (i.e., keeping only the day-time component) led to robust modeling results. We concluded that measurement protocols with 7 days and six measurement periods can be considered sufficient to model methane emissions from the dairy barn with solid floor with manure scraper, particularly when periods are distributed over the year with a preference for transition periods. Features should be normalized according to median and quartile range and must be carefully selected depending on the modeling approach.}, language = {en} } @article{CeulemansGuillGaedke2021, author = {Ceulemans, Ruben and Guill, Christian and Gaedke, Ursula}, title = {Top predators govern multitrophic diversity effects in tritrophic food webs}, series = {Ecology : a publication of the Ecological Society of America}, volume = {102}, journal = {Ecology : a publication of the Ecological Society of America}, number = {7}, publisher = {Wiley}, address = {Hoboken}, issn = {0012-9658}, doi = {10.1002/ecy.3379}, pages = {16}, year = {2021}, abstract = {It is well known that functional diversity strongly affects ecosystem functioning. However, even in rather simple model communities consisting of only two or, at best, three trophic levels, the relationship between multitrophic functional diversity and ecosystem functioning appears difficult to generalize, because of its high contextuality. In this study, we considered several differently structured tritrophic food webs, in which the amount of functional diversity was varied independently on each trophic level. To achieve generalizable results, largely independent of parametrization, we examined the outcomes of 128,000 parameter combinations sampled from ecologically plausible intervals, with each tested for 200 randomly sampled initial conditions. Analysis of our data was done by training a random forest model. This method enables the identification of complex patterns in the data through partial dependence graphs, and the comparison of the relative influence of model parameters, including the degree of diversity, on food-web properties. We found that bottom-up and top-down effects cascade simultaneously throughout the food web, intimately linking the effects of functional diversity of any trophic level to the amount of diversity of other trophic levels, which may explain the difficulty in unifying results from previous studies. Strikingly, only with high diversity throughout the whole food web, different interactions synergize to ensure efficient exploitation of the available nutrients and efficient biomass transfer to higher trophic levels, ultimately leading to a high biomass and production on the top level. The temporal variation of biomass showed a more complex pattern with increasing multitrophic diversity: while the system initially became less variable, eventually the temporal variation rose again because of the increasingly complex dynamical patterns. Importantly, top predator diversity and food-web parameters affecting the top trophic level were of highest importance to determine the biomass and temporal variability of any trophic level. Overall, our study reveals that the mechanisms by which diversity influences ecosystem functioning are affected by every part of the food web, hampering the extrapolation of insights from simple monotrophic or bitrophic systems to complex natural food webs.}, language = {en} } @article{RanaOeztuerkMalik2021, author = {Rana, Kamal and {\"O}zt{\"u}rk, Ugur and Malik, Nishant}, title = {Landslide geometry reveals its trigger}, series = {Geophysical research letters : GRL / American Geophysical Union}, volume = {48}, journal = {Geophysical research letters : GRL / American Geophysical Union}, number = {4}, publisher = {American Geophysical Union}, address = {Washington}, issn = {0094-8276}, doi = {10.1029/2020GL090848}, pages = {8}, year = {2021}, abstract = {Electronic databases of landslides seldom include the triggering mechanisms, rendering these inventories unusable for landslide hazard modeling. We present a method for classifying the triggering mechanisms of landslides in existing inventories, thus, allowing these inventories to aid in landslide hazard modeling corresponding to the correct event chain. Our method uses various geometric characteristics of landslides as the feature space for the machine-learning classifier random forest, resulting in accurate and robust classifications of landslide triggers. We applied the method to six landslide inventories spread over the Japanese archipelago in several different tests and training configurations to demonstrate the effectiveness of our approach. We achieved mean accuracy ranging from 67\% to 92\%. We also provide an illustrative example of a real-world usage scenario for our method using an additional inventory with unknown ground truth. Furthermore, our feature importance analysis indicates that landslides having identical trigger mechanisms exhibit similar geometric properties.}, language = {en} }