@phdthesis{Brill2022,
  author    = {Brill, Fabio Alexander},
  title     = {Applications of machine learning and open geospatial data in flood risk modelling},
  doi       = {10.25932/publishup-55594},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-555943},
  school      = {Universit{\"a}t Potsdam},
  pages     = {xix, 124},
  year      = {2022},
  abstract  = {Der technologische Fortschritt erlaubt es, zunehmend komplexe Vorhersagemodelle auf Basis immer gr{\"o}ßerer Datens{\"a}tze zu produzieren. F{\"u}r das Risikomanagement von Naturgefahren sind eine Vielzahl von Modellen als Entscheidungsgrundlage notwendig, z.B. in der Auswertung von Beobachtungsdaten, f{\"u}r die Vorhersage von Gefahrenszenarien, oder zur statistischen Absch{\"a}tzung der zu erwartenden Sch{\"a}den. Es stellt sich also die Frage, inwiefern moderne Modellierungsans{\"a}tze wie das maschinelle Lernen oder Data-Mining in diesem Themenbereich sinnvoll eingesetzt werden k{\"o}nnen. Zus{\"a}tzlich ist im Hinblick auf die Datenverf{\"u}gbarkeit und -zug{\"a}nglichkeit ein Trend zur {\"O}ffnung (open data) zu beobachten. Thema dieser Arbeit ist daher, die M{\"o}glichkeiten und Grenzen des maschinellen Lernens und frei verf{\"u}gbarer Geodaten auf dem Gebiet der Hochwasserrisikomodellierung im weiteren Sinne zu untersuchen. Da dieses {\"u}bergeordnete Thema sehr breit ist, werden einzelne relevante Aspekte herausgearbeitet und detailliert betrachtet. Eine prominente Datenquelle im Bereich Hochwasser ist die satellitenbasierte Kartierung von {\"U}berflutungsfl{\"a}chen, die z.B. {\"u}ber den Copernicus Service der Europ{\"a}ischen Union frei zur Verf{\"u}gung gestellt werden. Große Hoffnungen werden in der wissenschaftlichen Literatur in diese Produkte gesetzt, sowohl f{\"u}r die akute Unterst{\"u}tzung der Einsatzkr{\"a}fte im Katastrophenfall, als auch in der Modellierung mittels hydrodynamischer Modelle oder zur Schadensabsch{\"a}tzung. Daher wurde ein Fokus in dieser Arbeit auf die Untersuchung dieser Flutmasken gelegt. Aus der Beobachtung, dass die Qualit{\"a}t dieser Produkte in bewaldeten und urbanen Gebieten unzureichend ist, wurde ein Verfahren zur nachtr{\"a}glichenVerbesserung mittels maschinellem Lernen entwickelt. Das Verfahren basiert auf einem Klassifikationsalgorithmus der nur Trainingsdaten von einer vorherzusagenden Klasse ben{\"o}tigt, im konkreten Fall also Daten von {\"U}berflutungsfl{\"a}chen, nicht jedoch von der negativen Klasse (trockene Gebiete). Die Anwendung f{\"u}r Hurricane Harvey in Houston zeigt großes Potenzial der Methode, abh{\"a}ngig von der Qualit{\"a}t der urspr{\"u}nglichen Flutmaske. Anschließend wird anhand einer prozessbasierten Modellkette untersucht, welchen Einfluss implementierte physikalische Prozessdetails auf das vorhergesagte statistische Risiko haben. Es wird anschaulich gezeigt, was eine Risikostudie basierend auf etablierten Modellen leisten kann. Solche Modellketten sind allerdings bereits f{\"u}r Flusshochwasser sehr komplex, und f{\"u}r zusammengesetzte oder kaskadierende Ereignisse mit Starkregen, Sturzfluten, und weiteren Prozessen, kaum vorhanden. Im vierten Kapitel dieser Arbeit wird daher getestet, ob maschinelles Lernen auf Basis von vollst{\"a}ndigen Schadensdaten einen direkteren Weg zur Schadensmodellierung erm{\"o}glicht, der die explizite Konzeption einer solchen Modellkette umgeht. Dazu wird ein staatlich erhobener Datensatz der gesch{\"a}digten Geb{\"a}ude w{\"a}hrend des schweren El Ni{\~n}o Ereignisses 2017 in Peru verwendet. In diesem Kontext werden auch die M{\"o}glichkeiten des Data-Mining zur Extraktion von Prozessverst{\"a}ndnis ausgelotet. Es kann gezeigt werden, dass diverse frei verf{\"u}gbare Geodaten n{\"u}tzliche Informationen f{\"u}r die Gefahren- und Schadensmodellierung von komplexen Flutereignissen liefern, z.B. satellitenbasierte Regenmessungen, topographische und hydrographische Information, kartierte Siedlungsfl{\"a}chen, sowie Indikatoren aus Spektraldaten. Zudem zeigen sich Erkenntnisse zu den Sch{\"a}digungsprozessen, die im Wesentlichen mit den vorherigen Erwartungen in Einklang stehen. Die maximale Regenintensit{\"a}t wirkt beispielsweise in St{\"a}dten und steilen Schluchten st{\"a}rker sch{\"a}digend, w{\"a}hrend die Niederschlagssumme in tiefliegenden Flussgebieten und bewaldeten Regionen als aussagekr{\"a}ftiger befunden wurde. L{\"a}ndliche Gebiete in Peru weisen in der pr{\"a}sentierten Studie eine h{\"o}here Vulnerabilit{\"a}t als die Stadtgebiete auf. Jedoch werden auch die grunds{\"a}tzlichen Grenzen der Methodik und die Abh{\"a}ngigkeit von spezifischen Datens{\"a}tzen and Algorithmen offenkundig. In der {\"u}bergreifenden Diskussion werden schließlich die verschiedenen Methoden - prozessbasierte Modellierung, pr{\"a}diktives maschinelles Lernen, und Data-Mining - mit Blick auf die Gesamtfragestellungen evaluiert. Im Bereich der Gefahrenbeobachtung scheint eine Fokussierung auf neue Algorithmen sinnvoll. Im Bereich der Gefahrenmodellierung, insbesondere f{\"u}r Flusshochwasser, wird eher die Verbesserung von physikalischen Modellen, oder die Integration von prozessbasierten und statistischen Verfahren angeraten. In der Schadensmodellierung fehlen nach wie vor die großen repr{\"a}sentativen Datens{\"a}tze, die f{\"u}r eine breite Anwendung von maschinellem Lernen Voraussetzung ist. Daher ist die Verbesserung der Datengrundlage im Bereich der Sch{\"a}den derzeit als wichtiger einzustufen als die Auswahl der Algorithmen.},
  language  = {en}
}
@phdthesis{Bryant2024,
  author    = {Bryant, Seth},
  title     = {Aggregation and disaggregation in flood risk models},
  doi       = {10.25932/publishup-65095},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-650952},
  school      = {Universit{\"a}t Potsdam},
  pages     = {ix, 116},
  year      = {2024},
  abstract  = {Floods continue to be the leading cause of economic damages and fatalities among natural disasters worldwide. As future climate and exposure changes are projected to intensify these damages, the need for more accurate and scalable flood risk models is rising. Over the past decade, macro-scale flood risk models have evolved from initial proof-of-concepts to indispensable tools for decision-making at global-, nationaland, increasingly, the local-level. This progress has been propelled by the advent of high-performance computing and the availability of global, space-based datasets. However, despite such advancements, these models are rarely validated and consistently fall short of the accuracy achieved by high-resolution local models. While capabilities have improved, significant gaps persist in understanding the behaviours of such macro-scale models, particularly their tendency to overestimate risk. This dissertation aims to address such gaps by examining the scale transfers inherent in the construction and application of coarse macroscale models. To achieve this, four studies are presented that, collectively, address exposure, hazard, and vulnerability components of risk affected by upscaling or downscaling. The first study focuses on a type of downscaling where coarse flood hazard inundation grids are enhanced to a finer resolution. While such inundation downscaling has been employed in numerous global model chains, ours is the first study to focus specifically on this component, providing an evaluation of the state of the art and a novel algorithm. Findings demonstrate that our novel algorithm is eight times faster than existing methods, offers a slight improvement in accuracy, and generates more physically coherent flood maps in hydraulically challenging regions. When applied to a case study, the algorithm generated a 4m resolution inundation map from 30m hydrodynamic model outputs in 33 s, a 60-fold improvement in runtime with a 25\% increase in RMSE compared with direct hydrodynamic modelling. All evaluated downscaling algorithms yielded better accuracy than the coarse hydrodynamic model when compared to observations, demonstrating similar limits of coarse hydrodynamic models reported by others. The substitution of downscaling into flood risk model chains, in place of high-resolution modelling, can drastically improve the lead time of impactbased forecasts and the efficiency of hazard map production. With downscaling, local regions could obtain high resolution local inundation maps by post-processing a global model without the need for expensive modelling or expertise. The second study focuses on hazard aggregation and its implications for exposure, investigating implicit aggregations commonly used to intersect hazard grids with coarse exposure models. This research introduces a novel spatial classification framework to understand the effects of rescaling flood hazard grids to a coarser resolution. The study derives closed-form analytical solutions for the location and direction of bias from flood grid aggregation, showing that bias will always be present in regions near the edge of inundation. For example, inundation area will be positively biased when water depth grids are aggregated, while volume will be negatively biased when water elevation grids are aggregated. Extending the analysis to effects of hazard aggregation on building exposure, this study shows that exposure in regions at the edge of inundation are an order of magnitude more sensitive to aggregation errors than hazard alone. Among the two aggregation routines considered, averaging water surface elevation grids better preserved flood depths at buildings than averaging of water depth grids. The study provides the first mathematical proof and generalizeable treatment of flood hazard grid aggregation, demonstrating important mechanisms to help flood risk modellers understand and control model behaviour. The final two studies focus on the aggregation of vulnerability models or flood damage functions, investigating the practice of applying per-asset functions to aggregate exposure models. Both studies extend Jensen's inequality, a well-known 1906 mathematical proof, to demonstrate how the aggregation of flood damage functions leads to bias. Applying Jensen's proof in this new context, results show that typically concave flood damage functions will introduce a positive bias (overestimation) when aggregated. This behaviour was further investigated with a simulation experiment including 2 million buildings in Germany, four global flood hazard simulations and three aggregation scenarios. The results show that positive aggregation bias is not distributed evenly in space, meaning some regions identified as "hot spots of risk" in assessments may in fact just be hot spots of aggregation bias. This study provides the first application of Jensen's inequality to explain the overestimates reported elsewhere and advice for modellers to minimize such artifacts. In total, this dissertation investigates the complex ways aggregation and disaggregation influence the behaviour of risk models, focusing on the scale-transfers underpinning macro-scale flood risk assessments. Extending a key finding of the flood hazard literature to the broader context of flood risk, this dissertation concludes that all else equal, coarse models overestimate risk. This dissertation goes beyond previous studies by providing mathematical proofs for how and where such bias emerges in aggregation routines, offering a mechanistic explanation for coarse model overestimates. It shows that this bias is spatially heterogeneous, necessitating a deep understanding of how rescaling may bias models to effectively reduce or communicate uncertainties. Further, the dissertation offers specific recommendations to help modellers minimize scale transfers in problematic regions. In conclusion, I argue that such aggregation errors are epistemic, stemming from choices in model structure, and therefore hold greater potential and impetus for study and mitigation. This deeper understanding of uncertainties is essential for improving macro-scale flood risk models and their effectiveness in equitable, holistic, and sustainable flood management.},
  language  = {en}
}