@article{AbdelwahabLandwehr2022, author = {Abdelwahab, Ahmed and Landwehr, Niels}, title = {Deep Distributional Sequence Embeddings Based on a Wasserstein Loss}, series = {Neural processing letters}, journal = {Neural processing letters}, publisher = {Springer}, address = {Dordrecht}, issn = {1370-4621}, doi = {10.1007/s11063-022-10784-y}, pages = {21}, year = {2022}, abstract = {Deep metric learning employs deep neural networks to embed instances into a metric space such that distances between instances of the same class are small and distances between instances from different classes are large. In most existing deep metric learning techniques, the embedding of an instance is given by a feature vector produced by a deep neural network and Euclidean distance or cosine similarity defines distances between these vectors. This paper studies deep distributional embeddings of sequences, where the embedding of a sequence is given by the distribution of learned deep features across the sequence. The motivation for this is to better capture statistical information about the distribution of patterns within the sequence in the embedding. When embeddings are distributions rather than vectors, measuring distances between embeddings involves comparing their respective distributions. The paper therefore proposes a distance metric based on Wasserstein distances between the distributions and a corresponding loss function for metric learning, which leads to a novel end-to-end trainable embedding model. We empirically observe that distributional embeddings outperform standard vector embeddings and that training with the proposed Wasserstein metric outperforms training with other distance functions.}, language = {en} } @article{SchirrmannLandwehrGiebeletal.2021, author = {Schirrmann, Michael and Landwehr, Niels and Giebel, Antje and Garz, Andreas and Dammer, Karl-Heinz}, title = {Early detection of stripe rust in winter wheat using deep residual neural networks}, series = {Frontiers in plant science : FPLS}, volume = {12}, journal = {Frontiers in plant science : FPLS}, publisher = {Frontiers Media}, address = {Lausanne}, issn = {1664-462X}, doi = {10.3389/fpls.2021.469689}, pages = {14}, year = {2021}, abstract = {Stripe rust (Pst) is a major disease of wheat crops leading untreated to severe yield losses. The use of fungicides is often essential to control Pst when sudden outbreaks are imminent. Sensors capable of detecting Pst in wheat crops could optimize the use of fungicides and improve disease monitoring in high-throughput field phenotyping. Now, deep learning provides new tools for image recognition and may pave the way for new camera based sensors that can identify symptoms in early stages of a disease outbreak within the field. The aim of this study was to teach an image classifier to detect Pst symptoms in winter wheat canopies based on a deep residual neural network (ResNet). For this purpose, a large annotation database was created from images taken by a standard RGB camera that was mounted on a platform at a height of 2 m. Images were acquired while the platform was moved over a randomized field experiment with Pst-inoculated and Pst-free plots of winter wheat. The image classifier was trained with 224 x 224 px patches tiled from the original, unprocessed camera images. The image classifier was tested on different stages of the disease outbreak. At patch level the image classifier reached a total accuracy of 90\%. To test the image classifier on image level, the image classifier was evaluated with a sliding window using a large striding length of 224 px allowing for fast test performance. At image level, the image classifier reached a total accuracy of 77\%. Even in a stage with very low disease spreading (0.5\%) at the very beginning of the Pst outbreak, a detection accuracy of 57\% was obtained. Still in the initial phase of the Pst outbreak with 2 to 4\% of Pst disease spreading, detection accuracy with 76\% could be attained. With further optimizations, the image classifier could be implemented in embedded systems and deployed on drones, vehicles or scanning systems for fast mapping of Pst outbreaks.}, language = {en} } @article{GautamZhangLandwehretal.2021, author = {Gautam, Khem Raj and Zhang, Guoqiang and Landwehr, Niels and Adolphs, Julian}, title = {Machine learning for improvement of thermal conditions inside a hybrid ventilated animal building}, series = {Computers and electronics in agriculture : COMPAG online ; an international journal}, volume = {187}, journal = {Computers and electronics in agriculture : COMPAG online ; an international journal}, publisher = {Elsevier Science}, address = {Amsterdam [u.a.]}, issn = {0168-1699}, doi = {10.1016/j.compag.2021.106259}, pages = {10}, year = {2021}, abstract = {In buildings with hybrid ventilation, natural ventilation opening positions (windows), mechanical ventilation rates, heating, and cooling are manipulated to maintain desired thermal conditions. The indoor temperature is regulated solely by ventilation (natural and mechanical) when the external conditions are favorable to save external heating and cooling energy. The ventilation parameters are determined by a rule-based control scheme, which is not optimal. This study proposes a methodology to enable real-time optimum control of ventilation parameters. We developed offline prediction models to estimate future thermal conditions from the data collected from building in operation. The developed offline model is then used to find the optimal controllable ventilation parameters in real-time to minimize the setpoint deviation in the building. With the proposed methodology, the experimental building's setpoint deviation improved for 87\% of time, on average, by 0.53 degrees C compared to the current deviations.}, language = {en} } @article{CamargoSchirrmannLandwehretal.2021, author = {Camargo, Tibor de and Schirrmann, Michael and Landwehr, Niels and Dammer, Karl-Heinz and Pflanz, Michael}, title = {Optimized deep learning model as a basis for fast UAV mapping of weed species in winter wheat crops}, series = {Remote sensing / Molecular Diversity Preservation International (MDPI)}, volume = {13}, journal = {Remote sensing / Molecular Diversity Preservation International (MDPI)}, number = {9}, publisher = {MDPI}, address = {Basel}, issn = {2072-4292}, doi = {10.3390/rs13091704}, pages = {19}, year = {2021}, abstract = {Weed maps should be available quickly, reliably, and with high detail to be useful for site-specific management in crop protection and to promote more sustainable agriculture by reducing pesticide use. Here, the optimization of a deep residual convolutional neural network (ResNet-18) for the classification of weed and crop plants in UAV imagery is proposed. The target was to reach sufficient performance on an embedded system by maintaining the same features of the ResNet-18 model as a basis for fast UAV mapping. This would enable online recognition and subsequent mapping of weeds during UAV flying operation. Optimization was achieved mainly by avoiding redundant computations that arise when a classification model is applied on overlapping tiles in a larger input image. The model was trained and tested with imagery obtained from a UAV flight campaign at low altitude over a winter wheat field, and classification was performed on species level with the weed species Matricaria chamomilla L., Papaver rhoeas L., Veronica hederifolia L., and Viola arvensis ssp. arvensis observed in that field. The ResNet-18 model with the optimized image-level prediction pipeline reached a performance of 2.2 frames per second with an NVIDIA Jetson AGX Xavier on the full resolution UAV image, which would amount to about 1.78 ha h(-1) area output for continuous field mapping. The overall accuracy for determining crop, soil, and weed species was 94\%. There were some limitations in the detection of species unknown to the model. When shifting from 16-bit to 32-bit model precision, no improvement in classification accuracy was observed, but a strong decline in speed performance, especially when a higher number of filters was used in the ResNet-18 model. Future work should be directed towards the integration of the mapping process on UAV platforms, guiding UAVs autonomously for mapping purpose, and ensuring the transferability of the models to other crop fields.}, language = {en} } @article{HempelAdolphsLandwehretal.2020, author = {Hempel, Sabrina and Adolphs, Julian and Landwehr, Niels and Willink, Dilya and Janke, David and Amon, Thomas}, title = {Supervised machine learning to assess methane emissions of a dairy building with natural ventilation}, series = {Applied Sciences}, volume = {10}, journal = {Applied Sciences}, number = {19}, publisher = {MDPI}, address = {Basel}, issn = {2076-3417}, doi = {10.3390/app10196938}, pages = {21}, year = {2020}, abstract = {A reliable quantification of greenhouse gas emissions is a basis for the development of adequate mitigation measures. Protocols for emission measurements and data analysis approaches to extrapolate to accurate annual emission values are a substantial prerequisite in this context. We systematically analyzed the benefit of supervised machine learning methods to project methane emissions from a naturally ventilated cattle building with a concrete solid floor and manure scraper located in Northern Germany. We took into account approximately 40 weeks of hourly emission measurements and compared model predictions using eight regression approaches, 27 different sampling scenarios and four measures of model accuracy. Data normalization was applied based on median and quartile range. A correlation analysis was performed to evaluate the influence of individual features. This indicated only a very weak linear relation between the methane emission and features that are typically used to predict methane emission values of naturally ventilated barns. It further highlighted the added value of including day-time and squared ambient temperature as features. The error of the predicted emission values was in general below 10\%. The results from Gaussian processes, ordinary multilinear regression and neural networks were least robust. More robust results were obtained with multilinear regression with regularization, support vector machines and particularly the ensemble methods gradient boosting and random forest. The latter had the added value to be rather insensitive against the normalization procedure. In the case of multilinear regression, also the removal of not significantly linearly related variables (i.e., keeping only the day-time component) led to robust modeling results. We concluded that measurement protocols with 7 days and six measurement periods can be considered sufficient to model methane emissions from the dairy barn with solid floor with manure scraper, particularly when periods are distributed over the year with a preference for transition periods. Features should be normalized according to median and quartile range and must be carefully selected depending on the modeling approach.}, language = {en} } @article{SawadeBickelvonOertzenetal.2013, author = {Sawade, Christoph and Bickel, Steffen and von Oertzen, Timo and Scheffer, Tobias and Landwehr, Niels}, title = {Active evaluation of ranking functions based on graded relevance}, series = {Machine learning}, volume = {92}, journal = {Machine learning}, number = {1}, publisher = {Springer}, address = {Dordrecht}, issn = {0885-6125}, doi = {10.1007/s10994-013-5372-5}, pages = {41 -- 64}, year = {2013}, abstract = {Evaluating the quality of ranking functions is a core task in web search and other information retrieval domains. Because query distributions and item relevance change over time, ranking models often cannot be evaluated accurately on held-out training data. Instead, considerable effort is spent on manually labeling the relevance of query results for test queries in order to track ranking performance. We address the problem of estimating ranking performance as accurately as possible on a fixed labeling budget. Estimates are based on a set of most informative test queries selected by an active sampling distribution. Query labeling costs depend on the number of result items as well as item-specific attributes such as document length. We derive cost-optimal sampling distributions for the commonly used performance measures Discounted Cumulative Gain and Expected Reciprocal Rank. Experiments on web search engine data illustrate significant reductions in labeling costs.}, language = {en} } @article{ThonLandwehrDeRaedt2011, author = {Thon, Ingo and Landwehr, Niels and De Raedt, Luc}, title = {Stochastic relational processes efficient inference and applications}, series = {Machine learning}, volume = {82}, journal = {Machine learning}, number = {2}, publisher = {Springer}, address = {Dordrecht}, issn = {0885-6125}, doi = {10.1007/s10994-010-5213-8}, pages = {239 -- 272}, year = {2011}, abstract = {One of the goals of artificial intelligence is to develop agents that learn and act in complex environments. Realistic environments typically feature a variable number of objects, relations amongst them, and non-deterministic transition behavior. While standard probabilistic sequence models provide efficient inference and learning techniques for sequential data, they typically cannot fully capture the relational complexity. On the other hand, statistical relational learning techniques are often too inefficient to cope with complex sequential data. In this paper, we introduce a simple model that occupies an intermediate position in this expressiveness/efficiency trade-off. It is based on CP-logic (Causal Probabilistic Logic), an expressive probabilistic logic for modeling causality. However, by specializing CP-logic to represent a probability distribution over sequences of relational state descriptions and employing a Markov assumption, inference and learning become more tractable and effective. Specifically, we show how to solve part of the inference and learning problems directly at the first-order level, while transforming the remaining part into the problem of computing all satisfying assignments for a Boolean formula in a binary decision diagram. We experimentally validate that the resulting technique is able to handle probabilistic relational domains with a substantial number of objects and relations.}, language = {en} } @article{CiliaLandwehrPasserini2011, author = {Cilia, Elisa and Landwehr, Niels and Passerini, Andrea}, title = {Relational feature mining with hierarchical multitask kFOIL}, series = {Fundamenta informaticae}, volume = {113}, journal = {Fundamenta informaticae}, number = {2}, publisher = {IOS Press}, address = {Amsterdam}, issn = {0169-2968}, doi = {10.3233/FI-2011-604}, pages = {151 -- 177}, year = {2011}, abstract = {We introduce hierarchical kFOIL as a simple extension of the multitask kFOIL learning algorithm. The algorithm first learns a core logic representation common to all tasks, and then refines it by specialization on a per-task basis. The approach can be easily generalized to a deeper hierarchy of tasks. A task clustering algorithm is also proposed in order to automatically generate the task hierarchy. The approach is validated on problems of drug-resistance mutation prediction and protein structural classification. Experimental results show the advantage of the hierarchical version over both single and multi task alternatives and its potential usefulness in providing explanatory features for the domain. Task clustering allows to further improve performance when a deeper hierarchy is considered.}, language = {en} }