@phdthesis{Brill2022,
  author    = {Brill, Fabio Alexander},
  title     = {Applications of machine learning and open geospatial data in flood risk modelling},
  doi       = {10.25932/publishup-55594},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-555943},
  school      = {Universit{\"a}t Potsdam},
  pages     = {xix, 124},
  year      = {2022},
  abstract  = {Der technologische Fortschritt erlaubt es, zunehmend komplexe Vorhersagemodelle auf Basis immer gr{\"o}ßerer Datens{\"a}tze zu produzieren. F{\"u}r das Risikomanagement von Naturgefahren sind eine Vielzahl von Modellen als Entscheidungsgrundlage notwendig, z.B. in der Auswertung von Beobachtungsdaten, f{\"u}r die Vorhersage von Gefahrenszenarien, oder zur statistischen Absch{\"a}tzung der zu erwartenden Sch{\"a}den. Es stellt sich also die Frage, inwiefern moderne Modellierungsans{\"a}tze wie das maschinelle Lernen oder Data-Mining in diesem Themenbereich sinnvoll eingesetzt werden k{\"o}nnen. Zus{\"a}tzlich ist im Hinblick auf die Datenverf{\"u}gbarkeit und -zug{\"a}nglichkeit ein Trend zur {\"O}ffnung (open data) zu beobachten. Thema dieser Arbeit ist daher, die M{\"o}glichkeiten und Grenzen des maschinellen Lernens und frei verf{\"u}gbarer Geodaten auf dem Gebiet der Hochwasserrisikomodellierung im weiteren Sinne zu untersuchen. Da dieses {\"u}bergeordnete Thema sehr breit ist, werden einzelne relevante Aspekte herausgearbeitet und detailliert betrachtet. Eine prominente Datenquelle im Bereich Hochwasser ist die satellitenbasierte Kartierung von {\"U}berflutungsfl{\"a}chen, die z.B. {\"u}ber den Copernicus Service der Europ{\"a}ischen Union frei zur Verf{\"u}gung gestellt werden. Große Hoffnungen werden in der wissenschaftlichen Literatur in diese Produkte gesetzt, sowohl f{\"u}r die akute Unterst{\"u}tzung der Einsatzkr{\"a}fte im Katastrophenfall, als auch in der Modellierung mittels hydrodynamischer Modelle oder zur Schadensabsch{\"a}tzung. Daher wurde ein Fokus in dieser Arbeit auf die Untersuchung dieser Flutmasken gelegt. Aus der Beobachtung, dass die Qualit{\"a}t dieser Produkte in bewaldeten und urbanen Gebieten unzureichend ist, wurde ein Verfahren zur nachtr{\"a}glichenVerbesserung mittels maschinellem Lernen entwickelt. Das Verfahren basiert auf einem Klassifikationsalgorithmus der nur Trainingsdaten von einer vorherzusagenden Klasse ben{\"o}tigt, im konkreten Fall also Daten von {\"U}berflutungsfl{\"a}chen, nicht jedoch von der negativen Klasse (trockene Gebiete). Die Anwendung f{\"u}r Hurricane Harvey in Houston zeigt großes Potenzial der Methode, abh{\"a}ngig von der Qualit{\"a}t der urspr{\"u}nglichen Flutmaske. Anschließend wird anhand einer prozessbasierten Modellkette untersucht, welchen Einfluss implementierte physikalische Prozessdetails auf das vorhergesagte statistische Risiko haben. Es wird anschaulich gezeigt, was eine Risikostudie basierend auf etablierten Modellen leisten kann. Solche Modellketten sind allerdings bereits f{\"u}r Flusshochwasser sehr komplex, und f{\"u}r zusammengesetzte oder kaskadierende Ereignisse mit Starkregen, Sturzfluten, und weiteren Prozessen, kaum vorhanden. Im vierten Kapitel dieser Arbeit wird daher getestet, ob maschinelles Lernen auf Basis von vollst{\"a}ndigen Schadensdaten einen direkteren Weg zur Schadensmodellierung erm{\"o}glicht, der die explizite Konzeption einer solchen Modellkette umgeht. Dazu wird ein staatlich erhobener Datensatz der gesch{\"a}digten Geb{\"a}ude w{\"a}hrend des schweren El Ni{\~n}o Ereignisses 2017 in Peru verwendet. In diesem Kontext werden auch die M{\"o}glichkeiten des Data-Mining zur Extraktion von Prozessverst{\"a}ndnis ausgelotet. Es kann gezeigt werden, dass diverse frei verf{\"u}gbare Geodaten n{\"u}tzliche Informationen f{\"u}r die Gefahren- und Schadensmodellierung von komplexen Flutereignissen liefern, z.B. satellitenbasierte Regenmessungen, topographische und hydrographische Information, kartierte Siedlungsfl{\"a}chen, sowie Indikatoren aus Spektraldaten. Zudem zeigen sich Erkenntnisse zu den Sch{\"a}digungsprozessen, die im Wesentlichen mit den vorherigen Erwartungen in Einklang stehen. Die maximale Regenintensit{\"a}t wirkt beispielsweise in St{\"a}dten und steilen Schluchten st{\"a}rker sch{\"a}digend, w{\"a}hrend die Niederschlagssumme in tiefliegenden Flussgebieten und bewaldeten Regionen als aussagekr{\"a}ftiger befunden wurde. L{\"a}ndliche Gebiete in Peru weisen in der pr{\"a}sentierten Studie eine h{\"o}here Vulnerabilit{\"a}t als die Stadtgebiete auf. Jedoch werden auch die grunds{\"a}tzlichen Grenzen der Methodik und die Abh{\"a}ngigkeit von spezifischen Datens{\"a}tzen and Algorithmen offenkundig. In der {\"u}bergreifenden Diskussion werden schließlich die verschiedenen Methoden - prozessbasierte Modellierung, pr{\"a}diktives maschinelles Lernen, und Data-Mining - mit Blick auf die Gesamtfragestellungen evaluiert. Im Bereich der Gefahrenbeobachtung scheint eine Fokussierung auf neue Algorithmen sinnvoll. Im Bereich der Gefahrenmodellierung, insbesondere f{\"u}r Flusshochwasser, wird eher die Verbesserung von physikalischen Modellen, oder die Integration von prozessbasierten und statistischen Verfahren angeraten. In der Schadensmodellierung fehlen nach wie vor die großen repr{\"a}sentativen Datens{\"a}tze, die f{\"u}r eine breite Anwendung von maschinellem Lernen Voraussetzung ist. Daher ist die Verbesserung der Datengrundlage im Bereich der Sch{\"a}den derzeit als wichtiger einzustufen als die Auswahl der Algorithmen.},
  language  = {en}
}
@phdthesis{Esfahani2022,
  author    = {Esfahani, Reza Dokht Dolatabadi},
  title     = {Time-dependent monitoring of near-surface and ground motion modelling: developing new data processing approaches based on Music Information Retrieval (MIR) strategies},
  doi       = {10.25932/publishup-56767},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-567671},
  school      = {Universit{\"a}t Potsdam},
  pages     = {xiv, 107},
  year      = {2022},
  abstract  = {Seismology, like many scientific fields, e.g., music information retrieval and speech signal pro- cessing, is experiencing exponential growth in the amount of data acquired by modern seismo- logical networks. In this thesis, I take advantage of the opportunities offered by "big data" and by the methods developed in the areas of music information retrieval and machine learning to predict better the ground motion generated by earthquakes and to study the properties of the surface layers of the Earth. In order to better predict seismic ground motions, I propose two approaches based on unsupervised deep learning methods, an autoencoder network and Generative Adversarial Networks. The autoencoder technique explores a massive amount of ground motion data, evaluates the required parameters, and generates synthetic ground motion data in the Fourier amplitude spectra (FAS) domain. This method is tested on two synthetic datasets and one real dataset. The application on the real dataset shows that the substantial information contained within the FAS data can be encoded to a four to the five-dimensional manifold. Consequently, only a few independent parameters are required for efficient ground motion prediction. I also propose a method based on Conditional Generative Adversarial Networks (CGAN) for simulating ground motion records in the time-frequency and time domains. CGAN generates the time-frequency domains based on the parameters: magnitude, distance, and shear wave velocities to 30 m depth (VS30). After generating the amplitude of the time-frequency domains using the CGAN model, instead of classical conventional methods that assume the amplitude spectra with a random phase spectrum, the phase of the time-frequency domains is recovered by minimizing the observed and reconstructed spectrograms. In the second part of this dissertation, I propose two methods for the monitoring and characterization of near-surface materials and site effect analyses. I implement an autocorrelation function and an interferometry method to monitor the velocity changes of near-surface materials resulting from the Kumamoto earthquake sequence (Japan, 2016). The observed seismic velocity changes during the strong shaking are due to the non-linear response of the near-surface materials. The results show that the velocity changes lasted for about two months after the Kumamoto mainshock. Furthermore, I used the velocity changes to evaluate the in-situ strain-stress relationship. I also propose a method for assessing the site proxy "VS30" using non-invasive analysis. In the proposed method, a dispersion curve of surface waves is inverted to estimate the shear wave velocity of the subsurface. This method is based on the Dix-like linear operators, which relate the shear wave velocity to the phase velocity. The proposed method is fast, efficient, and stable. All of the methods presented in this work can be used for processing "big data" in seismology and for the analysis of weak and strong ground motion data, to predict ground shaking, and to analyze site responses by considering potential time dependencies and nonlinearities.},
  language  = {en}
}
@misc{HollsteinSeglGuanteretal.2016,
  author    = {Hollstein, Andr{\´e} and Segl, Karl and Guanter, Luis and Brell, Maximilian and Enesco, Marta},
  title     = {Ready-to-Use methods for the detection of clouds, cirrus, snow, shadow, water and clear sky pixels in Sentinel-2 MSI images},
  series = {remote sensing},
  journal   = {remote sensing},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-407938},
  pages     = {18},
  year      = {2016},
  abstract  = {Classification of clouds, cirrus, snow, shadows and clear sky areas is a crucial step in the pre-processing of optical remote sensing images and is a valuable input for their atmospheric correction. The Multi-Spectral Imager on board the Sentinel-2's of the Copernicus program offers optimized bands for this task and delivers unprecedented amounts of data regarding spatial sampling, global coverage, spectral coverage, and repetition rate. Efficient algorithms are needed to process, or possibly reprocess, those big amounts of data. Techniques based on top-of-atmosphere reflectance spectra for single-pixels without exploitation of external data or spatial context offer the largest potential for parallel data processing and highly optimized processing throughput. Such algorithms can be seen as a baseline for possible trade-offs in processing performance when the application of more sophisticated methods is discussed. We present several ready-to-use classification algorithms which are all based on a publicly available database of manually classified Sentinel-2A images. These algorithms are based on commonly used and newly developed machine learning techniques which drastically reduce the amount of time needed to update the algorithms when new images are added to the database. Several ready-to-use decision trees are presented which allow to correctly label about 91\% of the spectra within a validation dataset. While decision trees are simple to implement and easy to understand, they offer only limited classification skill. It improves to 98\% when the presented algorithm based on the classical Bayesian method is applied. This method has only recently been used for this task and shows excellent performance concerning classification skill and processing performance. A comparison of the presented algorithms with other commonly used techniques such as random forests, stochastic gradient descent, or support vector machines is also given. Especially random forests and support vector machines show similar classification skill as the classical Bayesian method.},
  language  = {en}
}
@article{KuehnHainzlDahmetal.2022,
  author    = {K{\"u}hn, Daniela and Hainzl, Sebastian and Dahm, Torsten and Richter, Gudrun and Vera Rodriguez, Ismael},
  title     = {A review of source models to further the understanding of the seismicity of the Groningen field},
  series = {Netherlands journal of geosciences : NJG},
  volume    = {101},
  journal   = {Netherlands journal of geosciences : NJG},
  publisher = {Cambridge Univ. Press},
  address   = {Cambridge},
  issn      = {0016-7746},
  doi       = {10.1017/njg.2022.7},
  pages     = {12},
  year      = {2022},
  abstract  = {The occurrence of felt earthquakes due to gas production in Groningen has initiated numerous studies and model attempts to understand and quantify induced seismicity in this region. The whole bandwidth of available models spans the range from fully deterministic models to purely empirical and stochastic models. In this article, we summarise the most important model approaches, describing their main achievements and limitations. In addition, we discuss remaining open questions and potential future directions of development.},
  language  = {en}
}
@phdthesis{Lilienkamp2024,
  author    = {Lilienkamp, Henning},
  title     = {Enhanced computational approaches for data-driven characterization of earthquake ground motion and rapid earthquake impact assessment},
  doi       = {10.25932/publishup-63195},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-631954},
  school      = {Universit{\"a}t Potsdam},
  pages     = {x, 145},
  year      = {2024},
  abstract  = {Rapidly growing seismic and macroseismic databases and simplified access to advanced machine learning methods have in recent years opened up vast opportunities to address challenges in engineering and strong motion seismology from novel, datacentric perspectives. In this thesis, I explore the opportunities of such perspectives for the tasks of ground motion modeling and rapid earthquake impact assessment, tasks with major implications for long-term earthquake disaster mitigation. In my first study, I utilize the rich strong motion database from the Kanto basin, Japan, and apply the U-Net artificial neural network architecture to develop a deep learning based ground motion model. The operational prototype provides statistical estimates of expected ground shaking, given descriptions of a specific earthquake source, wave propagation paths, and geophysical site conditions. The U-Net interprets ground motion data in its spatial context, potentially taking into account, for example, the geological properties in the vicinity of observation sites. Predictions of ground motion intensity are thereby calibrated to individual observation sites and earthquake locations. The second study addresses the explicit incorporation of rupture forward directivity into ground motion modeling. Incorporation of this phenomenon, causing strong, pulse like ground shaking in the vicinity of earthquake sources, is usually associated with an intolerable increase in computational demand during probabilistic seismic hazard analysis (PSHA) calculations. I suggest an approach in which I utilize an artificial neural network to efficiently approximate the average, directivity-related adjustment to ground motion predictions for earthquake ruptures from the 2022 New Zealand National Seismic Hazard Model. The practical implementation in an actual PSHA calculation demonstrates the efficiency and operational readiness of my model. In a follow-up study, I present a proof of concept for an alternative strategy in which I target the generalizing applicability to ruptures other than those from the New Zealand National Seismic Hazard Model. In the third study, I address the usability of pseudo-intensity reports obtained from macroseismic observations by non-expert citizens for rapid impact assessment. I demonstrate that the statistical properties of pseudo-intensity collections describing the intensity of shaking are correlated with the societal impact of earthquakes. In a second step, I develop a probabilistic model that, within minutes of an event, quantifies the probability of an earthquake to cause considerable societal impact. Under certain conditions, such a quick and preliminary method might be useful to support decision makers in their efforts to organize auxiliary measures for earthquake disaster response while results from more elaborate impact assessment frameworks are not yet available. The application of machine learning methods to datasets that only partially reveal characteristics of Big Data, qualify the majority of results obtained in this thesis as explorative insights rather than ready-to-use solutions to real world problems. The practical usefulness of this work will be better assessed in the future by applying the approaches developed to growing and increasingly complex data sets.},
  language  = {en}
}
@article{SchmidtHesseAttingeretal.2020,
  author    = {Schmidt, Lennart and Hesse, Falk and Attinger, Sabine and Kumar, Rohini},
  title     = {Challenges in applying machine learning models for hydrological inference},
  series = {Water resources research},
  volume    = {56},
  journal   = {Water resources research},
  number    = {5},
  publisher = {American Geophysical Union},
  address   = {Washington},
  issn      = {0043-1397},
  doi       = {10.1029/2019WR025924},
  pages     = {10},
  year      = {2020},
  abstract  = {Machine learning (ML) algorithms are being increasingly used in Earth and Environmental modeling studies owing to the ever-increasing availability of diverse data sets and computational resources as well as advancement in ML algorithms. Despite advances in their predictive accuracy, the usefulness of ML algorithms for inference remains elusive. In this study, we employ two popular ML algorithms, artificial neural networks and random forest, to analyze a large data set of flood events across Germany with the goals to analyze their predictive accuracy and their usability to provide insights to hydrologic system functioning. The results of the ML algorithms are contrasted against a parametric approach based on multiple linear regression. For analysis, we employ a model-agnostic framework named Permuted Feature Importance to derive the influence of models' predictors. This allows us to compare the results of different algorithms for the first time in the context of hydrology. Our main findings are that (1) the ML models achieve higher prediction accuracy than linear regression, (2) the results reflect basic hydrological principles, but (3) further inference is hindered by the heterogeneity of results across algorithms. Thus, we conclude that the problem of equifinality as known from classical hydrological modeling also exists for ML and severely hampers its potential for inference. To account for the observed problems, we propose that when employing ML for inference, this should be made by using multiple algorithms and multiple methods, of which the latter should be embedded in a cross-validation routine.},
  language  = {en}
}
@article{SteinbergVasyuraBathkeGaebleretal.2021,
  author    = {Steinberg, Andreas and Vasyura-Bathke, Hannes and Gaebler, Peter Jost and Ohrnberger, Matthias and Ceranna, Lars},
  title     = {Estimation of seismic moment tensors using variational inference machine learning},
  series = {Journal of geophysical research : Solid earth},
  volume    = {126},
  journal   = {Journal of geophysical research : Solid earth},
  number    = {10},
  publisher = {American Geophysical Union},
  address   = {Washington},
  issn      = {2169-9313},
  doi       = {10.1029/2021JB022685},
  pages     = {16},
  year      = {2021},
  abstract  = {We present an approach for rapidly estimating full moment tensors of earthquakes and their parameter uncertainties based on short time windows of recorded seismic waveform data by considering deep learning of Bayesian Neural Networks (BNNs). The individual neural networks are trained on synthetic seismic waveform data and corresponding known earthquake moment-tensor parameters. A monitoring volume has been predefined to form a three-dimensional grid of locations and to train a BNN for each grid point. Variational inference on several of these networks allows us to consider several sources of error and how they affect the estimated full moment-tensor parameters and their uncertainties. In particular, we demonstrate how estimated parameter distributions are affected by uncertainties in the earthquake centroid location in space and time as well as in the assumed Earth structure model. We apply our approach as a proof of concept on seismic waveform recordings of aftershocks of the Ridgecrest 2019 earthquake with moment magnitudes ranging from Mw 2.7 to Mw 5.5. Overall, good agreement has been achieved between inferred parameter ensembles and independently estimated parameters using classical methods. Our developed approach is fast and robust, and therefore, suitable for down-stream analyses that need rapid estimates of the source mechanism for a large number of earthquakes.},
  language  = {en}
}
@phdthesis{Zali2023,
  author    = {Zali, Zahra},
  title     = {Volcanic tremor analysis based on advanced signal processing concepts including music information retrieval (MIR) strategies},
  doi       = {10.25932/publishup-61086},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-610866},
  school      = {Universit{\"a}t Potsdam},
  pages     = {viii, 95},
  year      = {2023},
  abstract  = {Volcanoes are one of the Earth's most dynamic zones and responsible for many changes in our planet. Volcano seismology aims to provide an understanding of the physical processes in volcanic systems and anticipate the style and timing of eruptions by analyzing the seismic records. Volcanic tremor signals are usually observed in the seismic records before or during volcanic eruptions. Their analysis contributes to evaluate the evolving volcanic activity and potentially predict eruptions. Years of continuous seismic monitoring now provide useful information for operational eruption forecasting. The continuously growing amount of seismic recordings, however, poses a challenge for analysis, information extraction, and interpretation, to support timely decision making during volcanic crises. Furthermore, the complexity of eruption processes and precursory activities makes the analysis challenging. A challenge in studying seismic signals of volcanic origin is the coexistence of transient signal swarms and long-lasting volcanic tremor signals. Separating transient events from volcanic tremors can, therefore, contribute to improving our understanding of the underlying physical processes. Some similar issues (data reduction, source separation, extraction, and classification) are addressed in the context of music information retrieval (MIR). The signal characteristics of acoustic and seismic recordings comprise a number of similarities. This thesis is going beyond classical signal analysis techniques usually employed in seismology by exploiting similarities of seismic and acoustic signals and building the information retrieval strategy on the expertise developed in the field of MIR. First, inspired by the idea of harmonic-percussive separation (HPS) in musical signal processing, I have developed a method to extract harmonic volcanic tremor signals and to detect transient events from seismic recordings. This provides a clean tremor signal suitable for tremor investigation along with a characteristic function suitable for earthquake detection. Second, using HPS algorithms, I have developed a noise reduction technique for seismic signals. This method is especially useful for denoising ocean bottom seismometers, which are highly contaminated by noise. The advantage of this method compared to other denoising techniques is that it doesn't introduce distortion to the broadband earthquake waveforms, which makes it reliable for different applications in passive seismological analysis. Third, to address the challenge of extracting information from high-dimensional data and investigating the complex eruptive phases, I have developed an advanced machine learning model that results in a comprehensive signal processing scheme for volcanic tremors. Using this method seismic signatures of major eruptive phases can be automatically detected. This helps to provide a chronology of the volcanic system. Also, this model is capable to detect weak precursory volcanic tremors prior to the eruption, which could be used as an indicator of imminent eruptive activity. The extracted patterns of seismicity and their temporal variations finally provide an explanation for the transition mechanism between eruptive phases.},
  language  = {en}
}