@phdthesis{Bach2013,
  author    = {Bach, Christoph},
  title     = {Improving statistical seismicity models},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-70591},
  school      = {Universit{\"a}t Potsdam},
  year      = {2013},
  abstract  = {Several mechanisms are proposed to be part of the earthquake triggering process, including static stress interactions and dynamic stress transfer. Significant differences of these mechanisms are particularly expected in the spatial distribution of aftershocks. However, testing the different hypotheses is challenging because it requires the consideration of the large uncertainties involved in stress calculations as well as the appropriate consideration of secondary aftershock triggering which is related to stress changes induced by smaller pre- and aftershocks. In order to evaluate the forecast capability of different mechanisms, I take the effect of smaller--magnitude earthquakes into account by using the epidemic type aftershock sequence (ETAS) model where the spatial probability distribution of direct aftershocks, if available, is correlated to alternative source information and mechanisms. Surface shaking, rupture geometry, and slip distributions are tested. As an approximation of the shaking level, ShakeMaps are used which are available in near real-time after a mainshock and thus could be used for first-order forecasts of the spatial aftershock distribution. Alternatively, the use of empirical decay laws related to minimum fault distance is tested and Coulomb stress change calculations based on published and random slip models. For comparison, the likelihood values of the different model combinations are analyzed in the case of several well-known aftershock sequences (1992 Landers, 1999 Hector Mine, 2004 Parkfield). The tests show that the fault geometry is the most valuable information for improving aftershock forecasts. Furthermore, they reveal that static stress maps can additionally improve the forecasts of off--fault aftershock locations, while the integration of ground shaking data could not upgrade the results significantly. In the second part of this work, I focused on a procedure to test the information content of inverted slip models. This allows to quantify the information gain if this kind of data is included in aftershock forecasts. For this purpose, the ETAS model based on static stress changes, which is introduced in part one, is applied. The forecast ability of the models is systematically tested for several earthquake sequences and compared to models using random slip distributions. The influence of subfault resolution and segment strike and dip is tested. Some of the tested slip models perform very good, in that cases almost no random slip models are found to perform better. Contrastingly, for some of the published slip models, almost all random slip models perform better than the published slip model. Choosing a different subfault resolution hardly influences the result, as long the general slip pattern is still reproducible. Whereas different strike and dip values strongly influence the results depending on the standard deviation chosen, which is applied in the process of randomly selecting the strike and dip values.},
  language  = {en}
}
@phdthesis{Haider2013,
  author    = {Haider, Peter},
  title     = {Prediction with Mixture Models},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-69617},
  school      = {Universit{\"a}t Potsdam},
  year      = {2013},
  abstract  = {Learning a model for the relationship between the attributes and the annotated labels of data examples serves two purposes. Firstly, it enables the prediction of the label for examples without annotation. Secondly, the parameters of the model can provide useful insights into the structure of the data. If the data has an inherent partitioned structure, it is natural to mirror this structure in the model. Such mixture models predict by combining the individual predictions generated by the mixture components which correspond to the partitions in the data. Often the partitioned structure is latent, and has to be inferred when learning the mixture model. Directly evaluating the accuracy of the inferred partition structure is, in many cases, impossible because the ground truth cannot be obtained for comparison. However it can be assessed indirectly by measuring the prediction accuracy of the mixture model that arises from it. This thesis addresses the interplay between the improvement of predictive accuracy by uncovering latent cluster structure in data, and further addresses the validation of the estimated structure by measuring the accuracy of the resulting predictive model. In the application of filtering unsolicited emails, the emails in the training set are latently clustered into advertisement campaigns. Uncovering this latent structure allows filtering of future emails with very low false positive rates. In order to model the cluster structure, a Bayesian clustering model for dependent binary features is developed in this thesis. Knowing the clustering of emails into campaigns can also aid in uncovering which emails have been sent on behalf of the same network of captured hosts, so-called botnets. This association of emails to networks is another layer of latent clustering. Uncovering this latent structure allows service providers to further increase the accuracy of email filtering and to effectively defend against distributed denial-of-service attacks. To this end, a discriminative clustering model is derived in this thesis that is based on the graph of observed emails. The partitionings inferred using this model are evaluated through their capacity to predict the campaigns of new emails. Furthermore, when classifying the content of emails, statistical information about the sending server can be valuable. Learning a model that is able to make use of it requires training data that includes server statistics. In order to also use training data where the server statistics are missing, a model that is a mixture over potentially all substitutions thereof is developed. Another application is to predict the navigation behavior of the users of a website. Here, there is no a priori partitioning of the users into clusters, but to understand different usage scenarios and design different layouts for them, imposing a partitioning is necessary. The presented approach simultaneously optimizes the discriminative as well as the predictive power of the clusters. Each model is evaluated on real-world data and compared to baseline methods. The results show that explicitly modeling the assumptions about the latent cluster structure leads to improved predictions compared to the baselines. It is beneficial to incorporate a small number of hyperparameters that can be tuned to yield the best predictions in cases where the prediction accuracy can not be optimized directly.},
  language  = {en}
}