@phdthesis{Panzer2024, author = {Panzer, Marcel}, title = {Design of a hyper-heuristics based control framework for modular production systems}, doi = {10.25932/publishup-63300}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-633006}, school = {Universit{\"a}t Potsdam}, pages = {vi, 334}, year = {2024}, abstract = {Volatile supply and sales markets, coupled with increasing product individualization and complex production processes, present significant challenges for manufacturing companies. These must navigate and adapt to ever-shifting external and internal factors while ensuring robustness against process variabilities and unforeseen events. This has a pronounced impact on production control, which serves as the operational intersection between production planning and the shop- floor resources, and necessitates the capability to manage intricate process interdependencies effectively. Considering the increasing dynamics and product diversification, alongside the need to maintain constant production performances, the implementation of innovative control strategies becomes crucial. In recent years, the integration of Industry 4.0 technologies and machine learning methods has gained prominence in addressing emerging challenges in production applications. Within this context, this cumulative thesis analyzes deep learning based production systems based on five publications. Particular attention is paid to the applications of deep reinforcement learning, aiming to explore its potential in dynamic control contexts. Analysis reveal that deep reinforcement learning excels in various applications, especially in dynamic production control tasks. Its efficacy can be attributed to its interactive learning and real-time operational model. However, despite its evident utility, there are notable structural, organizational, and algorithmic gaps in the prevailing research. A predominant portion of deep reinforcement learning based approaches is limited to specific job shop scenarios and often overlooks the potential synergies in combined resources. Furthermore, it highlights the rare implementation of multi-agent systems and semi-heterarchical systems in practical settings. A notable gap remains in the integration of deep reinforcement learning into a hyper-heuristic. To bridge these research gaps, this thesis introduces a deep reinforcement learning based hyper- heuristic for the control of modular production systems, developed in accordance with the design science research methodology. Implemented within a semi-heterarchical multi-agent framework, this approach achieves a threefold reduction in control and optimisation complexity while ensuring high scalability, adaptability, and robustness of the system. In comparative benchmarks, this control methodology outperforms rule-based heuristics, reducing throughput times and tardiness, and effectively incorporates customer and order-centric metrics. The control artifact facilitates a rapid scenario generation, motivating for further research efforts and bridging the gap to real-world applications. The overarching goal is to foster a synergy between theoretical insights and practical solutions, thereby enriching scientific discourse and addressing current industrial challenges.}, language = {en} } @phdthesis{Maier2021, author = {Maier, Corinna}, title = {Bayesian data assimilation and reinforcement learning for model-informed precision dosing in oncology}, doi = {10.25932/publishup-51587}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-515870}, school = {Universit{\"a}t Potsdam}, pages = {x, 138}, year = {2021}, abstract = {While patients are known to respond differently to drug therapies, current clinical practice often still follows a standardized dosage regimen for all patients. For drugs with a narrow range of both effective and safe concentrations, this approach may lead to a high incidence of adverse events or subtherapeutic dosing in the presence of high patient variability. Model-informedprecision dosing (MIPD) is a quantitative approach towards dose individualization based on mathematical modeling of dose-response relationships integrating therapeutic drug/biomarker monitoring (TDM) data. MIPD may considerably improve the efficacy and safety of many drug therapies. Current MIPD approaches, however, rely either on pre-calculated dosing tables or on simple point predictions of the therapy outcome. These approaches lack a quantification of uncertainties and the ability to account for effects that are delayed. In addition, the underlying models are not improved while applied to patient data. Therefore, current approaches are not well suited for informed clinical decision-making based on a differentiated understanding of the individually predicted therapy outcome. The objective of this thesis is to develop mathematical approaches for MIPD, which (i) provide efficient fully Bayesian forecasting of the individual therapy outcome including associated uncertainties, (ii) integrate Markov decision processes via reinforcement learning (RL) for a comprehensive decision framework for dose individualization, (iii) allow for continuous learning across patients and hospitals. Cytotoxic anticancer chemotherapy with its major dose-limiting toxicity, neutropenia, serves as a therapeutically relevant application example. For more comprehensive therapy forecasting, we apply Bayesian data assimilation (DA) approaches, integrating patient-specific TDM data into mathematical models of chemotherapy-induced neutropenia that build on prior population analyses. The value of uncertainty quantification is demonstrated as it allows reliable computation of the patient-specific probabilities of relevant clinical quantities, e.g., the neutropenia grade. In view of novel home monitoring devices that increase the amount of TDM data available, the data processing of sequential DA methods proves to be more efficient and facilitates handling of the variability between dosing events. By transferring concepts from DA and RL we develop novel approaches for MIPD. While DA-guided dosing integrates individualized uncertainties into dose selection, RL-guided dosing provides a framework to consider delayed effects of dose selections. The combined DA-RL approach takes into account both aspects simultaneously and thus represents a holistic approach towards MIPD. Additionally, we show that RL can be used to gain insights into important patient characteristics for dose selection. The novel dosing strategies substantially reduce the occurrence of both subtherapeutic and life-threatening neutropenia grades in a simulation study based on a recent clinical study (CEPAC-TDM trial) compared to currently used MIPD approaches. If MIPD is to be implemented in routine clinical practice, a certain model bias with respect to the underlying model is inevitable, as the models are typically based on data from comparably small clinical trials that reflect only to a limited extent the diversity in real-world patient populations. We propose a sequential hierarchical Bayesian inference framework that enables continuous cross-patient learning to learn the underlying model parameters of the target patient population. It is important to note that the approach only requires summary information of the individual patient data to update the model. This separation of the individual inference from population inference enables implementation across different centers of care. The proposed approaches substantially improve current MIPD approaches, taking into account new trends in health care and aspects of practical applicability. They enable progress towards more informed clinical decision-making, ultimately increasing patient benefits beyond the current practice.}, language = {en} }