@article{ThamsenBeilharzVinhThuyTranetal.2020,
  author    = {Thamsen, Lauritz and Beilharz, Jossekin Jakob and Vinh Thuy Tran, and Nedelkoski, Sasho and Kao, Odej},
  title     = {Mary, Hugo, and Hugo*},
  series = {Concurrency and computation : practice \& experience},
  volume    = {33},
  journal   = {Concurrency and computation : practice \& experience},
  number    = {18},
  publisher = {Wiley},
  address   = {Hoboken},
  issn      = {1532-0626},
  doi       = {10.1002/cpe.5823},
  pages     = {12},
  year      = {2020},
  abstract  = {Distributed data-parallel processing systems like MapReduce, Spark, and Flink are popular for analyzing large datasets using cluster resources. Resource management systems like YARN or Mesos in turn allow multiple data-parallel processing jobs to share cluster resources in temporary containers. Often, the containers do not isolate resource usage to achieve high degrees of overall resource utilization despite overprovisioning and the often fluctuating utilization of specific jobs. However, some combinations of jobs utilize resources better and interfere less with each other when running on the same shared nodes than others. This article presents an approach for improving the resource utilization and job throughput when scheduling recurring distributed data-parallel processing jobs in shared clusters. The approach is based on reinforcement learning and a measure of co-location goodness to have cluster schedulers learn over time which jobs are best executed together on shared resources. We evaluated this approach over the last years with three prototype schedulers that build on each other: Mary, Hugo, and Hugo*. For the evaluation we used exemplary Flink and Spark jobs from different application domains and clusters of commodity nodes managed by YARN. The results of these experiments show that our approach can increase resource utilization and job throughput significantly.},
  language  = {en}
}
@phdthesis{Tan2023,
  author    = {Tan, Jing},
  title     = {Multi-Agent Reinforcement Learning for Interactive Decision-Making},
  doi       = {10.25932/publishup-60700},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-607000},
  school      = {Universit{\"a}t Potsdam},
  pages     = {xii, 135},
  year      = {2023},
  abstract  = {Distributed decision-making studies the choices made among a group of interactive and self-interested agents. Specifically, this thesis is concerned with the optimal sequence of choices an agent makes as it tries to maximize its achievement on one or multiple objectives in the dynamic environment. The optimization of distributed decision-making is important in many real-life applications, e.g., resource allocation (of products, energy, bandwidth, computing power, etc.) and robotics (heterogeneous agent cooperation on games or tasks), in various fields such as vehicular network, Internet of Things, smart grid, etc. This thesis proposes three multi-agent reinforcement learning algorithms combined with game-theoretic tools to study strategic interaction between decision makers, using resource allocation in vehicular network as an example. Specifically, the thesis designs an interaction mechanism based on second-price auction, incentivizes the agents to maximize multiple short-term and long-term, individual and system objectives, and simulates a dynamic environment with realistic mobility data to evaluate algorithm performance and study agent behavior. Theoretical results show that the mechanism has Nash equilibria, is a maximization of social welfare and Pareto optimal allocation of resources in a stationary environment. Empirical results show that in the dynamic environment, our proposed learning algorithms outperform state-of-the-art algorithms in single and multi-objective optimization, and demonstrate very good generalization property in significantly different environments. Specifically, with the long-term multi-objective learning algorithm, we demonstrate that by considering the long-term impact of decisions, as well as by incentivizing the agents with a system fairness reward, the agents achieve better results in both individual and system objectives, even when their objectives are private, randomized, and changing over time. Moreover, the agents show competitive behavior to maximize individual payoff when resource is scarce, and cooperative behavior in achieving a system objective when resource is abundant; they also learn the rules of the game, without prior knowledge, to overcome disadvantages in initial parameters (e.g., a lower budget). To address practicality concerns, the thesis also provides several computational performance improvement methods, and tests the algorithm in a single-board computer. Results show the feasibility of online training and inference in milliseconds. There are many potential future topics following this work. 1) The interaction mechanism can be modified into a double-auction, eliminating the auctioneer, resembling a completely distributed, ad hoc network; 2) the objectives are assumed to be independent in this thesis, there may be a more realistic assumption regarding correlation between objectives, such as a hierarchy of objectives; 3) current work limits information-sharing between agents, the setup befits applications with privacy requirements or sparse signaling; by allowing more information-sharing between the agents, the algorithms can be modified for more cooperative scenarios such as robotics.},
  language  = {en}
}
@article{PanzerBender2021,
  author    = {Panzer, Marcel and Bender, Benedict},
  title     = {Deep reinforcement learning in production systems},
  series = {International Journal of Production Research},
  volume    = {13},
  journal   = {International Journal of Production Research},
  number    = {60},
  publisher = {Taylor \& Francis},
  address   = {London},
  issn      = {1366-588X},
  doi       = {10.1080/00207543.2021.1973138},
  year      = {2021},
  abstract  = {Shortening product development cycles and fully customizable products pose major challenges for production systems. These not only have to cope with an increased product diversity but also enable high throughputs and provide a high adaptability and robustness to process variations and unforeseen incidents. To overcome these challenges, deep Reinforcement Learning (RL) has been increasingly applied for the optimization of production systems. Unlike other machine learning methods, deep RL operates on recently collected sensor-data in direct interaction with its environment and enables real-time responses to system changes. Although deep RL is already being deployed in production systems, a systematic review of the results has not yet been established. The main contribution of this paper is to provide researchers and practitioners an overview of applications and to motivate further implementations and research of deep RL supported production systems. Findings reveal that deep RL is applied in a variety of production domains, contributing to data-driven and flexible processes. In most applications, conventional methods were outperformed and implementation efforts or dependence on human experience were reduced. Nevertheless, future research must focus more on transferring the findings to real-world systems to analyze safety aspects and demonstrate reliability under prevailing conditions.},
  language  = {en}
}
@phdthesis{Panzer2024,
  author    = {Panzer, Marcel},
  title     = {Design of a hyper-heuristics based control framework for modular production systems},
  doi       = {10.25932/publishup-63300},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-633006},
  school      = {Universit{\"a}t Potsdam},
  pages     = {vi, 334},
  year      = {2024},
  abstract  = {Volatile supply and sales markets, coupled with increasing product individualization and complex production processes, present significant challenges for manufacturing companies. These must navigate and adapt to ever-shifting external and internal factors while ensuring robustness against process variabilities and unforeseen events. This has a pronounced impact on production control, which serves as the operational intersection between production planning and the shop- floor resources, and necessitates the capability to manage intricate process interdependencies effectively. Considering the increasing dynamics and product diversification, alongside the need to maintain constant production performances, the implementation of innovative control strategies becomes crucial. In recent years, the integration of Industry 4.0 technologies and machine learning methods has gained prominence in addressing emerging challenges in production applications. Within this context, this cumulative thesis analyzes deep learning based production systems based on five publications. Particular attention is paid to the applications of deep reinforcement learning, aiming to explore its potential in dynamic control contexts. Analysis reveal that deep reinforcement learning excels in various applications, especially in dynamic production control tasks. Its efficacy can be attributed to its interactive learning and real-time operational model. However, despite its evident utility, there are notable structural, organizational, and algorithmic gaps in the prevailing research. A predominant portion of deep reinforcement learning based approaches is limited to specific job shop scenarios and often overlooks the potential synergies in combined resources. Furthermore, it highlights the rare implementation of multi-agent systems and semi-heterarchical systems in practical settings. A notable gap remains in the integration of deep reinforcement learning into a hyper-heuristic. To bridge these research gaps, this thesis introduces a deep reinforcement learning based hyper- heuristic for the control of modular production systems, developed in accordance with the design science research methodology. Implemented within a semi-heterarchical multi-agent framework, this approach achieves a threefold reduction in control and optimisation complexity while ensuring high scalability, adaptability, and robustness of the system. In comparative benchmarks, this control methodology outperforms rule-based heuristics, reducing throughput times and tardiness, and effectively incorporates customer and order-centric metrics. The control artifact facilitates a rapid scenario generation, motivating for further research efforts and bridging the gap to real-world applications. The overarching goal is to foster a synergy between theoretical insights and practical solutions, thereby enriching scientific discourse and addressing current industrial challenges.},
  language  = {en}
}
@article{NebeKroemerSchadetal.2017,
  author    = {Nebe, Stephan and Kroemer, Nils B. and Schad, Daniel and Bernhardt, Nadine and Sebold, Miriam Hannah and Mueller, Dirk K. and Scholl, Lucie and Kuitunen-Paul, S{\"o}ren and Heinz, Andreas and Rapp, Michael Armin and Huys, Quentin J. M. and Smolka, Michael N.},
  title     = {No association of goal-directed and habitual control with alcohol consumption in young adults},
  series = {Addiction biology},
  volume    = {23},
  journal   = {Addiction biology},
  number    = {1},
  publisher = {Wiley},
  address   = {Hoboken},
  issn      = {1355-6215},
  doi       = {10.1111/adb.12490},
  pages     = {379 -- 393},
  year      = {2017},
  abstract  = {Alcohol dependence is a mental disorder that has been associated with an imbalance in behavioral control favoring model-free habitual over model-based goal-directed strategies. It is as yet unknown, however, whether such an imbalance reflects a predisposing vulnerability or results as a consequence of repeated and/or excessive alcohol exposure. We, therefore, examined the association of alcohol consumption with model-based goal-directed and model-free habitual control in 188 18-year-old social drinkers in a two-step sequential decision-making task while undergoing functional magnetic resonance imaging before prolonged alcohol misuse could have led to severe neurobiological adaptations. Behaviorally, participants showed a mixture of model-free and model-based decision-making as observed previously. Measures of impulsivity were positively related to alcohol consumption. In contrast, neither model-free nor model-based decision weights nor the trade-off between them were associated with alcohol consumption. There were also no significant associations between alcohol consumption and neural correlates of model-free or model-based decision quantities in either ventral striatum or ventromedial prefrontal cortex. Exploratory whole-brain functional magnetic resonance imaging analyses with a lenient threshold revealed early onset of drinking to be associated with an enhanced representation of model-free reward prediction errors in the posterior putamen. These results suggest that an imbalance between model-based goal-directed and model-free habitual control might rather not be a trait marker of alcohol intake per se.},
  language  = {en}
}
@phdthesis{Maier2021,
  author    = {Maier, Corinna},
  title     = {Bayesian data assimilation and reinforcement learning for model-informed precision dosing in oncology},
  doi       = {10.25932/publishup-51587},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-515870},
  school      = {Universit{\"a}t Potsdam},
  pages     = {x, 138},
  year      = {2021},
  abstract  = {While patients are known to respond differently to drug therapies, current clinical practice often still follows a standardized dosage regimen for all patients. For drugs with a narrow range of both effective and safe concentrations, this approach may lead to a high incidence of adverse events or subtherapeutic dosing in the presence of high patient variability. Model-informedprecision dosing (MIPD) is a quantitative approach towards dose individualization based on mathematical modeling of dose-response relationships integrating therapeutic drug/biomarker monitoring (TDM) data. MIPD may considerably improve the efficacy and safety of many drug therapies. Current MIPD approaches, however, rely either on pre-calculated dosing tables or on simple point predictions of the therapy outcome. These approaches lack a quantification of uncertainties and the ability to account for effects that are delayed. In addition, the underlying models are not improved while applied to patient data. Therefore, current approaches are not well suited for informed clinical decision-making based on a differentiated understanding of the individually predicted therapy outcome. The objective of this thesis is to develop mathematical approaches for MIPD, which (i) provide efficient fully Bayesian forecasting of the individual therapy outcome including associated uncertainties, (ii) integrate Markov decision processes via reinforcement learning (RL) for a comprehensive decision framework for dose individualization, (iii) allow for continuous learning across patients and hospitals. Cytotoxic anticancer chemotherapy with its major dose-limiting toxicity, neutropenia, serves as a therapeutically relevant application example. For more comprehensive therapy forecasting, we apply Bayesian data assimilation (DA) approaches, integrating patient-specific TDM data into mathematical models of chemotherapy-induced neutropenia that build on prior population analyses. The value of uncertainty quantification is demonstrated as it allows reliable computation of the patient-specific probabilities of relevant clinical quantities, e.g., the neutropenia grade. In view of novel home monitoring devices that increase the amount of TDM data available, the data processing of sequential DA methods proves to be more efficient and facilitates handling of the variability between dosing events. By transferring concepts from DA and RL we develop novel approaches for MIPD. While DA-guided dosing integrates individualized uncertainties into dose selection, RL-guided dosing provides a framework to consider delayed effects of dose selections. The combined DA-RL approach takes into account both aspects simultaneously and thus represents a holistic approach towards MIPD. Additionally, we show that RL can be used to gain insights into important patient characteristics for dose selection. The novel dosing strategies substantially reduce the occurrence of both subtherapeutic and life-threatening neutropenia grades in a simulation study based on a recent clinical study (CEPAC-TDM trial) compared to currently used MIPD approaches. If MIPD is to be implemented in routine clinical practice, a certain model bias with respect to the underlying model is inevitable, as the models are typically based on data from comparably small clinical trials that reflect only to a limited extent the diversity in real-world patient populations. We propose a sequential hierarchical Bayesian inference framework that enables continuous cross-patient learning to learn the underlying model parameters of the target patient population. It is important to note that the approach only requires summary information of the individual patient data to update the model. This separation of the individual inference from population inference enables implementation across different centers of care. The proposed approaches substantially improve current MIPD approaches, taking into account new trends in health care and aspects of practical applicability. They enable progress towards more informed clinical decision-making, ultimately increasing patient benefits beyond the current practice.},
  language  = {en}
}
@phdthesis{Kossmann2023,
  author    = {Koßmann, Jan},
  title     = {Unsupervised database optimization},
  doi       = {10.25932/publishup-58949},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-589490},
  school      = {Universit{\"a}t Potsdam},
  pages     = {xi, 203},
  year      = {2023},
  abstract  = {The amount of data stored in databases and the complexity of database workloads are ever- increasing. Database management systems (DBMSs) offer many configuration options, such as index creation or unique constraints, which must be adapted to the specific instance to efficiently process large volumes of data. Currently, such database optimization is complicated, manual work performed by highly skilled database administrators (DBAs). In cloud scenarios, manual database optimization even becomes infeasible: it exceeds the abilities of the best DBAs due to the enormous number of deployed DBMS instances (some providers maintain millions of instances), missing domain knowledge resulting from data privacy requirements, and the complexity of the configuration tasks. Therefore, we investigate how to automate the configuration of DBMSs efficiently with the help of unsupervised database optimization. While there are numerous configuration options, in this thesis, we focus on automatic index selection and the use of data dependencies, such as functional dependencies, for query optimization. Both aspects have an extensive performance impact and complement each other by approaching unsupervised database optimization from different perspectives. Our contributions are as follows: (1) we survey automated state-of-the-art index selection algorithms regarding various criteria, e.g., their support for index interaction. We contribute an extensible platform for evaluating the performance of such algorithms with industry-standard datasets and workloads. The platform is well-received by the community and has led to follow-up research. With our platform, we derive the strengths and weaknesses of the investigated algorithms. We conclude that existing solutions often have scalability issues and cannot quickly determine (near-)optimal solutions for large problem instances. (2) To overcome these limitations, we present two new algorithms. Extend determines (near-)optimal solutions with an iterative heuristic. It identifies the best index configurations for the evaluated benchmarks. Its selection runtimes are up to 10 times lower compared with other near-optimal approaches. SWIRL is based on reinforcement learning and delivers solutions instantly. These solutions perform within 3 \% of the optimal ones. Extend and SWIRL are available as open-source implementations. (3) Our index selection efforts are complemented by a mechanism that analyzes workloads to determine data dependencies for query optimization in an unsupervised fashion. We describe and classify 58 query optimization techniques based on functional, order, and inclusion dependencies as well as on unique column combinations. The unsupervised mechanism and three optimization techniques are implemented in our open-source research DBMS Hyrise. Our approach reduces the Join Order Benchmark's runtime by 26 \% and accelerates some TPC-DS queries by up to 58 times. Additionally, we have developed a cockpit for unsupervised database optimization that allows interactive experiments to build confidence in such automated techniques. In summary, our contributions improve the performance of DBMSs, support DBAs in their work, and enable them to contribute their time to other, less arduous tasks.},
  language  = {en}
}
@misc{FriedelSchlagenhaufBecketal.2014,
  author    = {Friedel, Eva and Schlagenhauf, Florian and Beck, Anne and Dolan, Raymond J. and Huys, Quentin J. M. and Rapp, Michael Armin and Heinz, Andreas},
  title     = {The effects of life stress and neural learning signals on fluid intelligence},
  series = {Postprints der Universit{\"a}t Potsdam : Humanwissenschaftliche Reihe},
  journal   = {Postprints der Universit{\"a}t Potsdam : Humanwissenschaftliche Reihe},
  number    = {621},
  issn      = {1866-8372},
  doi       = {10.25932/publishup-43514},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-435140},
  pages     = {35 -- 43},
  year      = {2014},
  abstract  = {Fluid intelligence (fluid IQ), defined as the capacity for rapid problem solving and behavioral adaptation, is known to be modulated by learning and experience. Both stressful life events (SLES) and neural correlates of learning [specifically, a key mediator of adaptive learning in the brain, namely the ventral striatal representation of prediction errors (PE)] have been shown to be associated with individual differences in fluid IQ. Here, we examine the interaction between adaptive learning signals (using a well-characterized probabilistic reversal learning task in combination with fMRI) and SLES on fluid IQ measures. We find that the correlation between ventral striatal BOLD PE and fluid IQ, which we have previously reported, is quantitatively modulated by the amount of reported SLES. Thus, after experiencing adversity, basic neuronal learning signatures appear to align more closely with a general measure of flexible learning (fluid IQ), a finding complementing studies on the effects of acute stress on learning. The results suggest that an understanding of the neurobiological correlates of trait variables like fluid IQ needs to take socioemotional influences such as chronic stress into account.},
  language  = {en}
}
@phdthesis{Afifi2023,
  author    = {Afifi, Haitham},
  title     = {Wireless In-Network Processing for Multimedia Applications},
  doi       = {10.25932/publishup-60437},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-604371},
  school      = {Universit{\"a}t Potsdam},
  pages     = {xiii, 233},
  year      = {2023},
  abstract  = {With the recent growth of sensors, cloud computing handles the data processing of many applications. Processing some of this data on the cloud raises, however, many concerns regarding, e.g., privacy, latency, or single points of failure. Alternatively, thanks to the development of embedded systems, smart wireless devices can share their computation capacity, creating a local wireless cloud for in-network processing. In this context, the processing of an application is divided into smaller jobs so that a device can run one or more jobs. The contribution of this thesis to this scenario is divided into three parts. In part one, I focus on wireless aspects, such as power control and interference management, for deciding which jobs to run on which node and how to route data between nodes. Hence, I formulate optimization problems and develop heuristic and meta-heuristic algorithms to allocate wireless and computation resources. Additionally, to deal with multiple applications competing for these resources, I develop a reinforcement learning (RL) admission controller to decide which application should be admitted. Next, I look into acoustic applications to improve wireless throughput by using microphone clock synchronization to synchronize wireless transmissions. In the second part, I jointly work with colleagues from the acoustic processing field to optimize both network and application (i.e., acoustic) qualities. My contribution focuses on the network part, where I study the relation between acoustic and network qualities when selecting a subset of microphones for collecting audio data or selecting a subset of optional jobs for processing these data; too many microphones or too many jobs can lessen quality by unnecessary delays. Hence, I develop RL solutions to select the subset of microphones under network constraints when the speaker is moving while still providing good acoustic quality. Furthermore, I show that autonomous vehicles carrying microphones improve the acoustic qualities of different applications. Accordingly, I develop RL solutions (single and multi-agent ones) for controlling these vehicles. In the third part, I close the gap between theory and practice. I describe the features of my open-source framework used as a proof of concept for wireless in-network processing. Next, I demonstrate how to run some algorithms developed by colleagues from acoustic processing using my framework. I also use the framework for studying in-network delays (wireless and processing) using different distributions of jobs and network topologies.},
  language  = {en}
}