@phdthesis{Jaeger2018,
  author    = {Jaeger, David},
  title     = {Enabling Big Data security analytics for advanced network attack detection},
  doi       = {10.25932/publishup-43571},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-435713},
  school      = {Universit{\"a}t Potsdam},
  pages     = {XVII, 201, XXXIII},
  year      = {2018},
  abstract  = {The last years have shown an increasing sophistication of attacks against enterprises. Traditional security solutions like firewalls, anti-virus systems and generally Intrusion Detection Systems (IDSs) are no longer sufficient to protect an enterprise against these advanced attacks. One popular approach to tackle this issue is to collect and analyze events generated across the IT landscape of an enterprise. This task is achieved by the utilization of Security Information and Event Management (SIEM) systems. However, the majority of the currently existing SIEM solutions is not capable of handling the massive volume of data and the diversity of event representations. Even if these solutions can collect the data at a central place, they are neither able to extract all relevant information from the events nor correlate events across various sources. Hence, only rather simple attacks are detected, whereas complex attacks, consisting of multiple stages, remain undetected. Undoubtedly, security operators of large enterprises are faced with a typical Big Data problem. In this thesis, we propose and implement a prototypical SIEM system named Real-Time Event Analysis and Monitoring System (REAMS) that addresses the Big Data challenges of event data with common paradigms, such as data normalization, multi-threading, in-memory storage, and distributed processing. In particular, a mostly stream-based event processing workflow is proposed that collects, normalizes, persists and analyzes events in near real-time. In this regard, we have made various contributions in the SIEM context. First, we propose a high-performance normalization algorithm that is highly parallelized across threads and distributed across nodes. Second, we are persisting into an in-memory database for fast querying and correlation in the context of attack detection. Third, we propose various analysis layers, such as anomaly- and signature-based detection, that run on top of the normalized and correlated events. As a result, we demonstrate our capabilities to detect previously known as well as unknown attack patterns. Lastly, we have investigated the integration of cyber threat intelligence (CTI) into the analytical process, for instance, for correlating monitored user accounts with previously collected public identity leaks to identify possible compromised user accounts. In summary, we show that a SIEM system can indeed monitor a large enterprise environment with a massive load of incoming events. As a result, complex attacks spanning across the whole network can be uncovered and mitigated, which is an advancement in comparison to existing SIEM systems on the market.},
  language  = {en}
}
@article{JaegerGraupnerPelchenetal.2018,
  author    = {Jaeger, David and Graupner, Hendrik and Pelchen, Chris and Cheng, Feng and Meinel, Christoph},
  title     = {Fast Automated Processing and Evaluation of Identity Leaks},
  series = {International journal of parallel programming},
  volume    = {46},
  journal   = {International journal of parallel programming},
  number    = {2},
  publisher = {Springer},
  address   = {New York},
  issn      = {0885-7458},
  doi       = {10.1007/s10766-016-0478-6},
  pages     = {441 -- 470},
  year      = {2018},
  abstract  = {The relevance of identity data leaks on the Internet is more present than ever. Almost every week we read about leakage of databases with more than a million users in the news. Smaller but not less dangerous leaks happen even multiple times a day. The public availability of such leaked data is a major threat to the victims, but also creates the opportunity to learn not only about security of service providers but also the behavior of users when choosing passwords. Our goal is to analyze this data and generate knowledge that can be used to increase security awareness and security, respectively. This paper presents a novel approach to the processing and analysis of a vast majority of bigger and smaller leaks. We evolved from a semi-manual to a fully automated process that requires a minimum of human interaction. Our contribution is the concept and a prototype implementation of a leak processing workflow that includes the extraction of digital identities from structured and unstructured leak-files, the identification of hash routines and a quality control to ensure leak authenticity. By making use of parallel and distributed programming, we are able to make leaks almost immediately available for analysis and notification after they have been published. Based on the data collected, this paper reveals how easy it is for criminals to collect lots of passwords, which are plain text or only weakly hashed. We publish those results and hope to increase not only security awareness of Internet users but also security on a technical level on the service provider side.},
  language  = {en}
}