@article{HagemannAbramova2023, author = {Hagemann, Linus and Abramova, Olga}, title = {Sentiment, we-talk and engagement on social media}, series = {Internet research}, volume = {33}, journal = {Internet research}, number = {6}, publisher = {Emeral}, address = {Bingley}, issn = {1066-2243}, doi = {10.1108/INTR-12-2021-0885}, pages = {2058 -- 2085}, year = {2023}, abstract = {Purpose Given inconsistent results in prior studies, this paper applies the dual process theory to investigate what social media messages yield audience engagement during a political event. It tests how affective cues (emotional valence, intensity and collective self-representation) and cognitive cues (insight, causation, certainty and discrepancy) contribute to public engagement. Design/methodology/approach The authors created a dataset of more than three million tweets during the 2020 United States (US) presidential elections. Affective and cognitive cues were assessed via sentiment analysis. The hypotheses were tested in negative binomial regressions. The authors also scrutinized a subsample of far-famed Twitter users. The final dataset, scraping code, preprocessing and analysis are available in an open repository. Findings The authors found the prominence of both affective and cognitive cues. For the overall sample, negativity bias was registered, and the tweet's emotionality was negatively related to engagement. In contrast, in the sub-sample of tweets from famous users, emotionally charged content produced higher engagement. The role of sentiment decreases when the number of followers grows and ultimately becomes insignificant for Twitter participants with many followers. Collective self-representation ("we-talk") is consistently associated with more likes, comments and retweets in the overall sample and subsamples. Originality/value The authors expand the dominating one-sided perspective to social media message processing focused on the peripheral route and hence affective cues. Leaning on the dual process theory, the authors shed light on the effectiveness of both affective (peripheral route) and cognitive (central route) cues on information appeal and dissemination on Twitter during a political event. The popularity of the tweet's author moderates these relationships.}, language = {en} } @phdthesis{Najafi2023, author = {Najafi, Pejman}, title = {Leveraging data science \& engineering for advanced security operations}, doi = {10.25932/publishup-61225}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-612257}, school = {Universit{\"a}t Potsdam}, pages = {xix, 180}, year = {2023}, abstract = {The Security Operations Center (SOC) represents a specialized unit responsible for managing security within enterprises. To aid in its responsibilities, the SOC relies heavily on a Security Information and Event Management (SIEM) system that functions as a centralized repository for all security-related data, providing a comprehensive view of the organization's security posture. Due to the ability to offer such insights, SIEMS are considered indispensable tools facilitating SOC functions, such as monitoring, threat detection, and incident response. Despite advancements in big data architectures and analytics, most SIEMs fall short of keeping pace. Architecturally, they function merely as log search engines, lacking the support for distributed large-scale analytics. Analytically, they rely on rule-based correlation, neglecting the adoption of more advanced data science and machine learning techniques. This thesis first proposes a blueprint for next-generation SIEM systems that emphasize distributed processing and multi-layered storage to enable data mining at a big data scale. Next, with the architectural support, it introduces two data mining approaches for advanced threat detection as part of SOC operations. First, a novel graph mining technique that formulates threat detection within the SIEM system as a large-scale graph mining and inference problem, built on the principles of guilt-by-association and exempt-by-reputation. The approach entails the construction of a Heterogeneous Information Network (HIN) that models shared characteristics and associations among entities extracted from SIEM-related events/logs. Thereon, a novel graph-based inference algorithm is used to infer a node's maliciousness score based on its associations with other entities in the HIN. Second, an innovative outlier detection technique that imitates a SOC analyst's reasoning process to find anomalies/outliers. The approach emphasizes explainability and simplicity, achieved by combining the output of simple context-aware univariate submodels that calculate an outlier score for each entry. Both approaches were tested in academic and real-world settings, demonstrating high performance when compared to other algorithms as well as practicality alongside a large enterprise's SIEM system. This thesis establishes the foundation for next-generation SIEM systems that can enhance today's SOCs and facilitate the transition from human-centric to data-driven security operations.}, language = {en} } @inproceedings{AbramovaBatzelModesti2022, author = {Abramova, Olga and Batzel, Katharina and Modesti, Daniela}, title = {Coping and regulatory responses on social media during health crisis}, series = {Proceedings of the 55th Hawaii International Conference on System Sciences}, booktitle = {Proceedings of the 55th Hawaii International Conference on System Sciences}, publisher = {HICSS Conference Office University of Hawaii at Manoa}, address = {Honolulu}, isbn = {978-0-9981331-5-7}, pages = {10}, year = {2022}, abstract = {During a crisis event, social media enables two-way communication and many-to-many information broadcasting, browsing others' posts, publishing own content, and public commenting. These records can deliver valuable insights to approach problematic situations effectively. Our study explores how social media communication can be analyzed to understand the responses to health crises better. Results based on nearly 800 K tweets indicate that the coping and regulation foci framework holds good explanatory power, with four clusters salient in public reactions: 1) "Understanding" (problem-promotion); 2) "Action planning" (problem-prevention); 3) "Hope" (emotion-promotion) and 4) "Reassurance" (emotion-prevention). Second, the inter-temporal analysis shows high volatility of topic proportions and a shift from self-centered to community-centered topics during the course of the event. The insights are beneficial for research on crisis management and practicians who are interested in large-scale monitoring of their audience for well-informed decision-making.}, language = {en} } @incollection{RojahnAmbrosBiruetal.2023, author = {Rojahn, Marcel and Ambros, Maximilian and Biru, Tibebu and Krallmann, Hermann and Gronau, Norbert and Grum, Marcus}, title = {Adequate basis for the data-driven and machine-learning-based identification}, series = {Artificial intelligence and soft computing}, booktitle = {Artificial intelligence and soft computing}, editor = {Rutkowski, Leszek and Scherer, RafaƂ and Korytkowski, Marcin and Pedrycz, Witold and Tadeusiewicz, Ryszard and Zurada, Jacek M.}, publisher = {Springer}, address = {Cham}, isbn = {978-3-031-42504-2}, doi = {10.1007/978-3-031-42505-9_48}, pages = {570 -- 588}, year = {2023}, abstract = {Process mining (PM) has established itself in recent years as a main method for visualizing and analyzing processes. However, the identification of knowledge has not been addressed adequately because PM aims solely at data-driven discovering, monitoring, and improving real-world processes from event logs available in various information systems. The following paper, therefore, outlines a novel systematic analysis view on tools for data-driven and machine learning (ML)-based identification of knowledge-intensive target processes. To support the effectiveness of the identification process, the main contributions of this study are (1) to design a procedure for a systematic review and analysis for the selection of relevant dimensions, (2) to identify different categories of dimensions as evaluation metrics to select source systems, algorithms, and tools for PM and ML as well as include them in a multi-dimensional grid box model, (3) to select and assess the most relevant dimensions of the model, (4) to identify and assess source systems, algorithms, and tools in order to find evidence for the selected dimensions, and (5) to assess the relevance and applicability of the conceptualization and design procedure for tool selection in data-driven and ML-based process mining research.}, language = {en} }