@phdthesis{Koehler2009,
  author    = {K{\"o}hler, Andreas},
  title     = {Recognition and investigation of temporal patterns in seismic wavefields using unsupervised learning techniques},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-29702},
  school      = {Universit{\"a}t Potsdam},
  year      = {2009},
  abstract  = {Modern acquisition of seismic data on receiver networks worldwide produces an increasing amount of continuous wavefield recordings. Hence, in addition to manual data inspection, seismogram interpretation requires new processing utilities for event detection, signal classification and data visualization. Various machine learning algorithms, which can be adapted to seismological problems, have been suggested in the field of pattern recognition. This can be done either by means of supervised learning using manually defined training data or by unsupervised clustering and visualization. The latter allows the recognition of wavefield patterns, such as short-term transients and long-term variations, with a minimum of domain knowledge. Besides classical earthquake seismology, investigations of temporal patterns in seismic data also concern novel approaches such as noise cross-correlation or ambient seismic vibration analysis in general, which have moved into focus within the last decade. In order to find records suitable for the respective approach or simply for quality control, unsupervised preprocessing becomes important and valuable for large data sets. Machine learning techniques require the parametrization of the data using feature vectors. Applied to seismic recordings, wavefield properties have to be computed from the raw seismograms. For an unsupervised approach, all potential wavefield features have to be considered to reduce subjectivity to a minimum. Furthermore, automatic dimensionality reduction, i.e. feature selection, is required in order to decrease computational cost, enhance interpretability and improve discriminative power. This study presents an unsupervised feature selection and learning approach for the discovery, imaging and interpretation of significant temporal patterns in seismic single-station or network recordings. In particular, techniques permitting an intuitive, quickly interpretable and concise overview of available records are suggested. For this purpose, the data is parametrized by real-valued feature vectors for short time windows using standard seismic analysis tools as feature generation methods, such as frequency-wavenumber, polarization, and spectral analysis. The choice of the time window length is dependent on the expected durations of patterns to be recognized or discriminated. We use Self-Organizing Maps (SOMs) for a data-driven feature selection, visualization and clustering procedure, which is particularly suitable for high-dimensional data sets. Using synthetics composed of Rayleigh and Love waves and three different types of real-world data sets, we show the robustness and reliability of our unsupervised learning approach with respect to the effect of algorithm parameters and data set properties. Furthermore, we approve the capability of the clustering and imaging techniques. For all data, we find improved discriminative power of our feature selection procedure compared to feature subsets manually selected from individual wavefield parametrization methods. In particular, enhanced performance is observed compared to the most favorable individual feature generation method, which is found to be the frequency spectrum. The method is applied to regional earthquake records at the European Broadband Network with the aim to define suitable features for earthquake detection and seismic phase classification. For the latter, we find that a combination of spectral and polarization features favor S wave detection at a single receiver. However, SOM-based visualization of phase discrimination shows that clustering applied to the records of two stations only allows onset or P wave detection, respectively. In order to improve the discrimination of S waves on receiver networks, we recommend to consider additionally the temporal context of feature vectors. The application to continuous recordings of seismicity close to an active volcano (Mount Merapi, Java, Indonesia) shows that two typical volcano-seismic events (VTB and Guguran) can be detected and distinguished by clustering. In contrast, so-called MP events cannot be discriminated. Comparable results are obtained for selected features and recognition rates regarding a previously implemented supervised classification system. Finally, we test the reliability of wavefield clustering to improve common ambient vibration analysis methods such as estimation of dispersion curves and horizontal to vertical spectral ratios. It is found, that in general, the identified short- and long-term patterns have no significant impact on those estimates. However, for individual sites, effects of local sources can be identified. Leaving out the corresponding clusters, yields reduced uncertainties or allows for improving estimation of dispersion curves.},
  language  = {en}
}