@techreport{Andres2024, type = {Working Paper}, author = {Andres, Maximilian}, title = {Equilibrium selection in infinitely repeated games with communication}, series = {CEPA Discussion Papers}, journal = {CEPA Discussion Papers}, number = {75}, issn = {2628-653X}, doi = {10.25932/publishup-63180}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-631800}, pages = {38}, year = {2024}, abstract = {The present paper proposes a novel approach for equilibrium selection in the infinitely repeated prisoner's dilemma where players can communicate before choosing their strategies. This approach yields a critical discount factor that makes different predictions for cooperation than the usually considered sub-game perfect or risk dominance critical discount factors. In laboratory experiments, we find that our factor is useful for predicting cooperation. For payoff changes where the usually considered factors and our factor make different predictions, the observed cooperation is consistent with the predictions based on our factor.}, language = {en} } @techreport{AndresBruttel2024, type = {Working Paper}, author = {Andres, Maximilian and Bruttel, Lisa}, title = {Communicating Cartel Intentions}, series = {CEPA Discussion Papers}, journal = {CEPA Discussion Papers}, number = {77}, issn = {2628-653X}, doi = {10.25932/publishup-63846}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-638469}, pages = {36}, year = {2024}, abstract = {While the economic harm of cartels is caused by their price-increasing effect, sanctioning by courts rather targets at the underlying process of firms reaching a price-fixing agreement. This paper provides experimental evidence on the question whether such sanctioning meets the economic target, i.e., whether evidence of a collusive meeting of the firms and of the content of their communication reliably predicts subsequent prices. We find that already the mere mutual agreement to meet predicts a strong increase in prices. Conversely, express distancing from communication completely nullifies its otherwise price-increasing effect. Using machine learning, we show that communication only increases prices if it is very explicit about how the cartel plans to behave.}, language = {en} } @phdthesis{Lilienkamp2024, author = {Lilienkamp, Henning}, title = {Enhanced computational approaches for data-driven characterization of earthquake ground motion and rapid earthquake impact assessment}, doi = {10.25932/publishup-63195}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-631954}, school = {Universit{\"a}t Potsdam}, pages = {x, 145}, year = {2024}, abstract = {Rapidly growing seismic and macroseismic databases and simplified access to advanced machine learning methods have in recent years opened up vast opportunities to address challenges in engineering and strong motion seismology from novel, datacentric perspectives. In this thesis, I explore the opportunities of such perspectives for the tasks of ground motion modeling and rapid earthquake impact assessment, tasks with major implications for long-term earthquake disaster mitigation. In my first study, I utilize the rich strong motion database from the Kanto basin, Japan, and apply the U-Net artificial neural network architecture to develop a deep learning based ground motion model. The operational prototype provides statistical estimates of expected ground shaking, given descriptions of a specific earthquake source, wave propagation paths, and geophysical site conditions. The U-Net interprets ground motion data in its spatial context, potentially taking into account, for example, the geological properties in the vicinity of observation sites. Predictions of ground motion intensity are thereby calibrated to individual observation sites and earthquake locations. The second study addresses the explicit incorporation of rupture forward directivity into ground motion modeling. Incorporation of this phenomenon, causing strong, pulse like ground shaking in the vicinity of earthquake sources, is usually associated with an intolerable increase in computational demand during probabilistic seismic hazard analysis (PSHA) calculations. I suggest an approach in which I utilize an artificial neural network to efficiently approximate the average, directivity-related adjustment to ground motion predictions for earthquake ruptures from the 2022 New Zealand National Seismic Hazard Model. The practical implementation in an actual PSHA calculation demonstrates the efficiency and operational readiness of my model. In a follow-up study, I present a proof of concept for an alternative strategy in which I target the generalizing applicability to ruptures other than those from the New Zealand National Seismic Hazard Model. In the third study, I address the usability of pseudo-intensity reports obtained from macroseismic observations by non-expert citizens for rapid impact assessment. I demonstrate that the statistical properties of pseudo-intensity collections describing the intensity of shaking are correlated with the societal impact of earthquakes. In a second step, I develop a probabilistic model that, within minutes of an event, quantifies the probability of an earthquake to cause considerable societal impact. Under certain conditions, such a quick and preliminary method might be useful to support decision makers in their efforts to organize auxiliary measures for earthquake disaster response while results from more elaborate impact assessment frameworks are not yet available. The application of machine learning methods to datasets that only partially reveal characteristics of Big Data, qualify the majority of results obtained in this thesis as explorative insights rather than ready-to-use solutions to real world problems. The practical usefulness of this work will be better assessed in the future by applying the approaches developed to growing and increasingly complex data sets.}, language = {en} } @phdthesis{Taleb2024, author = {Taleb, Aiham}, title = {Self-supervised deep learning methods for medical image analysis}, doi = {10.25932/publishup-64408}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-644089}, school = {Universit{\"a}t Potsdam}, pages = {xii, 171}, year = {2024}, abstract = {Deep learning has seen widespread application in many domains, mainly for its ability to learn data representations from raw input data. Nevertheless, its success has so far been coupled with the availability of large annotated (labelled) datasets. This is a requirement that is difficult to fulfil in several domains, such as in medical imaging. Annotation costs form a barrier in extending deep learning to clinically-relevant use cases. The labels associated with medical images are scarce, since the generation of expert annotations of multimodal patient data at scale is non-trivial, expensive, and time-consuming. This substantiates the need for algorithms that learn from the increasing amounts of unlabeled data. Self-supervised representation learning algorithms offer a pertinent solution, as they allow solving real-world (downstream) deep learning tasks with fewer annotations. Self-supervised approaches leverage unlabeled samples to acquire generic features about different concepts, enabling annotation-efficient downstream task solving subsequently. Nevertheless, medical images present multiple unique and inherent challenges for existing self-supervised learning approaches, which we seek to address in this thesis: (i) medical images are multimodal, and their multiple modalities are heterogeneous in nature and imbalanced in quantities, e.g. MRI and CT; (ii) medical scans are multi-dimensional, often in 3D instead of 2D; (iii) disease patterns in medical scans are numerous and their incidence exhibits a long-tail distribution, so it is oftentimes essential to fuse knowledge from different data modalities, e.g. genomics or clinical data, to capture disease traits more comprehensively; (iv) Medical scans usually exhibit more uniform color density distributions, e.g. in dental X-Rays, than natural images. Our proposed self-supervised methods meet these challenges, besides significantly reducing the amounts of required annotations. We evaluate our self-supervised methods on a wide array of medical imaging applications and tasks. Our experimental results demonstrate the obtained gains in both annotation-efficiency and performance; our proposed methods outperform many approaches from related literature. Additionally, in case of fusion with genetic modalities, our methods also allow for cross-modal interpretability. In this thesis, not only we show that self-supervised learning is capable of mitigating manual annotation costs, but also our proposed solutions demonstrate how to better utilize it in the medical imaging domain. Progress in self-supervised learning has the potential to extend deep learning algorithms application to clinical scenarios.}, language = {en} } @article{WilkschAbramova2023, author = {Wilksch, Moritz and Abramova, Olga}, title = {PyFin-sentiment}, series = {International journal of information management data insights}, volume = {3}, journal = {International journal of information management data insights}, number = {1}, publisher = {Elsevier}, address = {Amsterdam}, issn = {2667-0968}, doi = {10.1016/j.jjimei.2023.100171}, pages = {10}, year = {2023}, abstract = {Responding to the poor performance of generic automated sentiment analysis solutions on domain-specific texts, we collect a dataset of 10,000 tweets discussing the topics of finance and investing. We manually assign each tweet its market sentiment, i.e., the investor's anticipation of a stock's future return. Using this data, we show that all existing sentiment models trained on adjacent domains struggle with accurate market sentiment analysis due to the task's specialized vocabulary. Consequently, we design, train, and deploy our own sentiment model. It outperforms all previous models (VADER, NTUSD-Fin, FinBERT, TwitterRoBERTa) when evaluated on Twitter posts. On posts from a different platform, our model performs on par with BERT-based large language models. We achieve this result at a fraction of the training and inference costs due to the model's simple design. We publish the artifact as a python library to facilitate its use by future researchers and practitioners.}, language = {en} } @phdthesis{Mientus2023, author = {Mientus, Lukas}, title = {Reflexion und Reflexivit{\"a}t}, doi = {10.25932/publishup-61000}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-610003}, school = {Universit{\"a}t Potsdam}, pages = {121}, year = {2023}, abstract = {Reflexion gilt in der Lehrkr{\"a}ftebildung als eine Schl{\"u}sselkategorie der professionellen Entwicklung. Entsprechend wird auf vielf{\"a}ltige Weise die Qualit{\"a}t reflexionsbezogener Kompetenzen untersucht. Eine Herausforderung hierbei kann in der Annahme bestehen, von der Analyse schriftlicher Reflexionen unmittelbar auf die Reflexivit{\"a}t einer Person zu schließen, da Reflexion stets kontextspezifisch als Abbild reflexionsbezogener Argumentationsprozesse angesehen werden sollte und reflexionsbezogenen Dispositionen unterliegt. Auch kann die Qualit{\"a}t einer Reflexion auf mehreren Dimensionen bewertet werden, ohne quantifizierbare, absolute Aussagen treffen zu k{\"o}nnen. Daher wurden im Rahmen einer Physik-Videovignette N = 134 schriftliche Fremdreflexionen verfasst und kontextspezifische reflexionsbezogene Dispositionen erhoben. Expert*innen erstellten theoriegeleitet Qualit{\"a}tsbewertungen zur Breite, Tiefe, Koh{\"a}renz und Spezifit{\"a}t eines jeden Reflexionstextes. Unter Verwendung computerbasierter Klassifikations- und Analyseverfahren wurden weitere Textmerkmale erhoben. Mittels explorativer Faktorenanalyse konnten die Faktoren Qualit{\"a}t, Quantit{\"a}t und Deskriptivit{\"a}t gefunden werden. Da alle konventionell eingesch{\"a}tzten Qualit{\"a}tsbewertungen durch einen Faktor repr{\"a}sentiert wurden, konnte ein maximales Qualit{\"a}tskorrelat kalkuliert werden, zu welchem jede schriftliche Fremdreflexion im Rahmen der vorliegenden Vignette eine computerbasiert bestimmbare Distanz aufweist. Diese Distanz zum maximalen Qualit{\"a}tskorrelat konnte validiert werden und kann die Qualit{\"a}t der schriftlichen Reflexionen unabh{\"a}ngig von menschlichen Ressourcen quantifiziert repr{\"a}sentieren. Abschließend konnte identifiziert werden, dass ausgew{\"a}hlte Dispositionen in unterschiedlichem Maße mit der Reflexionsqualit{\"a}t zusammenh{\"a}ngen. So konnten beispielsweise bezogen auf das Physik-Fachwissen minimale Zusammenh{\"a}nge identifiziert werden, wohingegen Werthaltung sowie wahrgenommene Unterrichtsqualit{\"a}t eng mit der Qualit{\"a}t einer schriftlichen Reflexion in Verbindung stehen k{\"o}nnen. Es wird geschlussfolgert, dass reflexionsbezogene Dispositionen moderierenden Einfluss auf Reflexionen nehmen k{\"o}nnen. Es wird empfohlen bei der Erhebung von Reflexion mit dem Ziel der Kompetenzmessung ausgew{\"a}hlte Dispositionen mit zu erheben. Weiter verdeutlicht diese Arbeit die M{\"o}glichkeit, aussagekr{\"a}ftige Quantifizierungen auch in der Analyse komplexer Konstrukte vorzunehmen. Durch computerbasierte Qualit{\"a}tsabsch{\"a}tzungen k{\"o}nnen objektive und individuelle Analysen und differenzierteres automatisiertes Feedback erm{\"o}glicht werden.}, language = {de} } @article{WulffMientusNowaketal.2023, author = {Wulff, Peter and Mientus, Lukas and Nowak, Anna and Borowski, Andreas}, title = {KI-basierte Auswertung von schriftlichen Unterrichtsreflexionen im Fach Physik und automatisierte R{\"u}ckmeldung}, series = {PSI-Potsdam: Ergebnisbericht zu den Aktivit{\"a}ten im Rahmen der Qualit{\"a}tsoffensive Lehrerbildung (2019-2023) (Potsdamer Beitr{\"a}ge zur Lehrerbildung und Bildungsforschung ; 3)}, journal = {PSI-Potsdam: Ergebnisbericht zu den Aktivit{\"a}ten im Rahmen der Qualit{\"a}tsoffensive Lehrerbildung (2019-2023) (Potsdamer Beitr{\"a}ge zur Lehrerbildung und Bildungsforschung ; 3)}, number = {3}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-568-2}, issn = {2626-3556}, doi = {10.25932/publishup-61636}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-616363}, pages = {103 -- 115}, year = {2023}, abstract = {F{\"u}r die Entwicklung professioneller Handlungskompetenzen angehender Lehrkr{\"a}fte stellt die Unterrichtsreflexion ein wichtiges Instrument dar, um Theoriewissen und Praxiserfahrungen in Beziehung zu setzen. Die Auswertung von Unterrichtsreflexionen und eine entsprechende R{\"u}ckmeldung stellt Forschende und Dozierende allerdings vor praktische wie theoretische Herausforderungen. Im Kontext der Forschung zu K{\"u}nstlicher Intelligenz (KI) entwickelte Methoden bieten hier neue Potenziale. Der Beitrag stellt {\"u}berblicksartig zwei Teilstudien vor, die mit Hilfe von KI-Methoden wie dem maschinellen Lernen untersuchen, inwieweit eine Auswertung von Unterrichtsreflexionen angehender Physiklehrkr{\"a}fte auf Basis eines theoretisch abgeleiteten Reflexionsmodells und die automatisierte R{\"u}ckmeldung hierzu m{\"o}glich sind. Dabei wurden unterschiedliche Ans{\"a}tze des maschinellen Lernens verwendet, um modellbasierte Klassifikation und Exploration von Themen in Unterrichtsreflexionen umzusetzen. Die Genauigkeit der Ergebnisse wurde vor allem durch sog. Große Sprachmodelle gesteigert, die auch den Transfer auf andere Standorte und F{\"a}cher erm{\"o}glichen. F{\"u}r die fachdidaktische Forschung bedeuten sie jedoch wiederum neue Herausforderungen, wie etwa systematische Verzerrungen und Intransparenz von Entscheidungen. Dennoch empfehlen wir, die Potenziale der KI-basierten Methoden gr{\"u}ndlicher zu erforschen und konsequent in der Praxis (etwa in Form von Webanwendungen) zu implementieren.}, language = {de} } @book{Weber2023, author = {Weber, Benedikt}, title = {Human pose estimation for decubitus prophylaxis}, number = {153}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-551-4}, issn = {1613-5652}, doi = {10.25932/publishup-56719}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-567196}, publisher = {Universit{\"a}t Potsdam}, pages = {73}, year = {2023}, abstract = {Decubitus is one of the most relevant diseases in nursing and the most expensive to treat. It is caused by sustained pressure on tissue, so it particularly affects bed-bound patients. This work lays a foundation for pressure mattress-based decubitus prophylaxis by implementing a solution to the single-frame 2D Human Pose Estimation problem. For this, methods of Deep Learning are employed. Two approaches are examined, a coarse-to-fine Convolutional Neural Network for direct regression of joint coordinates and a U-Net for the derivation of probability distribution heatmaps. We conclude that training our models on a combined dataset of the publicly available Bodies at Rest and SLP data yields the best results. Furthermore, various preprocessing techniques are investigated, and a hyperparameter optimization is performed to discover an improved model architecture. Another finding indicates that the heatmap-based approach outperforms direct regression. This model achieves a mean per-joint position error of 9.11 cm for the Bodies at Rest data and 7.43 cm for the SLP data. We find that it generalizes well on data from mattresses other than those seen during training but has difficulties detecting the arms correctly. Additionally, we give a brief overview of the medical data annotation tool annoto we developed in the bachelor project and furthermore conclude that the Scrum framework and agile practices enhanced our development workflow.}, language = {en} } @phdthesis{Najafi2023, author = {Najafi, Pejman}, title = {Leveraging data science \& engineering for advanced security operations}, doi = {10.25932/publishup-61225}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-612257}, school = {Universit{\"a}t Potsdam}, pages = {xix, 180}, year = {2023}, abstract = {The Security Operations Center (SOC) represents a specialized unit responsible for managing security within enterprises. To aid in its responsibilities, the SOC relies heavily on a Security Information and Event Management (SIEM) system that functions as a centralized repository for all security-related data, providing a comprehensive view of the organization's security posture. Due to the ability to offer such insights, SIEMS are considered indispensable tools facilitating SOC functions, such as monitoring, threat detection, and incident response. Despite advancements in big data architectures and analytics, most SIEMs fall short of keeping pace. Architecturally, they function merely as log search engines, lacking the support for distributed large-scale analytics. Analytically, they rely on rule-based correlation, neglecting the adoption of more advanced data science and machine learning techniques. This thesis first proposes a blueprint for next-generation SIEM systems that emphasize distributed processing and multi-layered storage to enable data mining at a big data scale. Next, with the architectural support, it introduces two data mining approaches for advanced threat detection as part of SOC operations. First, a novel graph mining technique that formulates threat detection within the SIEM system as a large-scale graph mining and inference problem, built on the principles of guilt-by-association and exempt-by-reputation. The approach entails the construction of a Heterogeneous Information Network (HIN) that models shared characteristics and associations among entities extracted from SIEM-related events/logs. Thereon, a novel graph-based inference algorithm is used to infer a node's maliciousness score based on its associations with other entities in the HIN. Second, an innovative outlier detection technique that imitates a SOC analyst's reasoning process to find anomalies/outliers. The approach emphasizes explainability and simplicity, achieved by combining the output of simple context-aware univariate submodels that calculate an outlier score for each entry. Both approaches were tested in academic and real-world settings, demonstrating high performance when compared to other algorithms as well as practicality alongside a large enterprise's SIEM system. This thesis establishes the foundation for next-generation SIEM systems that can enhance today's SOCs and facilitate the transition from human-centric to data-driven security operations.}, language = {en} } @phdthesis{Seleem2023, author = {Seleem, Omar}, title = {Towards urban pluvial flood mapping using data-driven models}, doi = {10.25932/publishup-59813}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-598137}, school = {Universit{\"a}t Potsdam}, pages = {xv, 95}, year = {2023}, abstract = {Casualties and damages from urban pluvial flooding are increasing. Triggered by short, localized, and intensive rainfall events, urban pluvial floods can occur anywhere, even in areas without a history of flooding. Urban pluvial floods have relatively small temporal and spatial scales. Although cumulative losses from urban pluvial floods are comparable, most flood risk management and mitigation strategies focus on fluvial and coastal flooding. Numerical-physical-hydrodynamic models are considered the best tool to represent the complex nature of urban pluvial floods; however, they are computationally expensive and time-consuming. These sophisticated models make large-scale analysis and operational forecasting prohibitive. Therefore, it is crucial to evaluate and benchmark the performance of other alternative methods. The findings of this cumulative thesis are represented in three research articles. The first study evaluates two topographic-based methods to map urban pluvial flooding, fill-spill-merge (FSM) and topographic wetness index (TWI), by comparing them against a sophisticated hydrodynamic model. The FSM method identifies flood-prone areas within topographic depressions while the TWI method employs maximum likelihood estimation to calibrate a TWI threshold (τ) based on inundation maps from the 2D hydrodynamic model. The results point out that the FSM method outperforms the TWI method. The study highlights then the advantage and limitations of both methods. Data-driven models provide a promising alternative to computationally expensive hydrodynamic models. However, the literature lacks benchmarking studies to evaluate the different models' performance, advantages and limitations. Model transferability in space is a crucial problem. Most studies focus on river flooding, likely due to the relative availability of flow and rain gauge records for training and validation. Furthermore, they consider these models as black boxes. The second study uses a flood inventory for the city of Berlin and 11 predictive features which potentially indicate an increased pluvial flooding hazard to map urban pluvial flood susceptibility using a convolutional neural network (CNN), an artificial neural network (ANN) and the benchmarking machine learning models random forest (RF) and support vector machine (SVM). I investigate the influence of spatial resolution on the implemented models, the models' transferability in space and the importance of the predictive features. The results show that all models perform well and the RF models are superior to the other models within and outside the training domain. The models developed using fine spatial resolution (2 and 5 m) could better identify flood-prone areas. Finally, the results point out that aspect is the most important predictive feature for the CNN models, and altitude is for the other models. While flood susceptibility maps identify flood-prone areas, they do not represent flood variables such as velocity and depth which are necessary for effective flood risk management. To address this, the third study investigates data-driven models' transferability to predict urban pluvial floodwater depth and the models' ability to enhance their predictions using transfer learning techniques. It compares the performance of RF (the best-performing model in the previous study) and CNN models using 12 predictive features and output from a hydrodynamic model. The findings in the third study suggest that while CNN models tend to generalise and smooth the target function on the training dataset, RF models suffer from overfitting. Hence, RF models are superior for predictions inside the training domains but fail outside them while CNN models could control the relative loss in performance outside the training domains. Finally, the CNN models benefit more from transfer learning techniques than RF models, boosting their performance outside training domains. In conclusion, this thesis has evaluated both topographic-based methods and data-driven models to map urban pluvial flooding. However, further studies are crucial to have methods that completely overcome the limitation of 2D hydrodynamic models.}, language = {en} }