@article{HempelAdolphsLandwehretal.2020, author = {Hempel, Sabrina and Adolphs, Julian and Landwehr, Niels and Janke, David and Amon, Thomas}, title = {How the selection of training data and modeling approach affects the estimation of ammonia emissions from a naturally ventilated dairy barn—classical statistics versus machine learning}, series = {Sustainability}, volume = {12}, journal = {Sustainability}, number = {3}, publisher = {MDPI}, address = {Basel}, issn = {2071-1050}, doi = {10.3390/su12031030}, pages = {18}, year = {2020}, abstract = {Environmental protection efforts can only be effective in the long term with a reliable quantification of pollutant gas emissions as a first step to mitigation. Measurement and analysis strategies must permit the accurate extrapolation of emission values. We systematically analyzed the added value of applying modern machine learning methods in the process of monitoring emissions from naturally ventilated livestock buildings to the atmosphere. We considered almost 40 weeks of hourly emission values from a naturally ventilated dairy cattle barn in Northern Germany. We compared model predictions using 27 different scenarios of temporal sampling, multiple measures of model accuracy, and eight different regression approaches. The error of the predicted emission values with the tested measurement protocols was, on average, well below 20\%. The sensitivity of the prediction to the selected training dataset was worse for the ordinary multilinear regression. Gradient boosting and random forests provided the most accurate and robust emission value predictions, accompanied by the second-smallest model errors. Most of the highly ranked scenarios involved six measurement periods, while the scenario with the best overall performance was: One measurement period in summer and three in the transition periods, each lasting for 14 days.}, language = {en} }