@phdthesis{Boeken2022, author = {B{\"o}ken, Bj{\"o}rn}, title = {Improving prediction accuracy using dynamic information}, doi = {10.25932/publishup-58512}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-585125}, school = {Universit{\"a}t Potsdam}, pages = {xii, 160}, year = {2022}, abstract = {Accurately solving classification problems nowadays is likely to be the most relevant machine learning task. Binary classification separating two classes only is algorithmically simpler but has fewer potential applications as many real-world problems are multi-class. On the reverse, separating only a subset of classes simplifies the classification task. Even though existing multi-class machine learning algorithms are very flexible regarding the number of classes, they assume that the target set Y is fixed and cannot be restricted once the training is finished. On the other hand, existing state-of-the-art production environments are becoming increasingly interconnected with the advance of Industry 4.0 and related technologies such that additional information can simplify the respective classification problems. In light of this, the main aim of this thesis is to introduce dynamic classification that generalizes multi-class classification such that the target class set can be restricted arbitrarily to a non-empty class subset M of Y at any time between two consecutive predictions. This task is solved by a combination of two algorithmic approaches. First, classifier calibration, which transforms predictions into posterior probability estimates that are intended to be well calibrated. The analysis provided focuses on monotonic calibration and in particular corrects wrong statements that appeared in the literature. It also reveals that bin-based evaluation metrics, which became popular in recent years, are unjustified and should not be used at all. Next, the validity of Platt scaling, which is the most relevant parametric calibration approach, is analyzed in depth. In particular, its optimality for classifier predictions distributed according to four different families of probability distributions as well its equivalence with Beta calibration up to a sigmoidal preprocessing are proven. For non-monotonic calibration, extended variants on kernel density estimation and the ensemble method EKDE are introduced. Finally, the calibration techniques are evaluated using a simulation study with complete information as well as on a selection of 46 real-world data sets. Building on this, classifier calibration is applied as part of decomposition-based classification that aims to reduce multi-class problems to simpler (usually binary) prediction tasks. For the involved fusing step performed at prediction time, a new approach based on evidence theory is presented that uses classifier calibration to model mass functions. This allows the analysis of decomposition-based classification against a strictly formal background and to prove closed-form equations for the overall combinations. Furthermore, the same formalism leads to a consistent integration of dynamic class information, yielding a theoretically justified and computationally tractable dynamic classification model. The insights gained from this modeling are combined with pairwise coupling, which is one of the most relevant reduction-based classification approaches, such that all individual predictions are combined with a weight. This not only generalizes existing works on pairwise coupling but also enables the integration of dynamic class information. Lastly, a thorough empirical study is performed that compares all newly introduced approaches to existing state-of-the-art techniques. For this, evaluation metrics for dynamic classification are introduced that depend on corresponding sampling strategies. Thereafter, these are applied during a three-part evaluation. First, support vector machines and random forests are applied on 26 data sets from the UCI Machine Learning Repository. Second, two state-of-the-art deep neural networks are evaluated on five benchmark data sets from a relatively recent reference work. Here, computationally feasible strategies to apply the presented algorithms in combination with large-scale models are particularly relevant because a naive application is computationally intractable. Finally, reference data from a real-world process allowing the inclusion of dynamic class information are collected and evaluated. The results show that in combination with support vector machines and random forests, pairwise coupling approaches yield the best results, while in combination with deep neural networks, differences between the different approaches are mostly small to negligible. Most importantly, all results empirically confirm that dynamic classification succeeds in improving the respective prediction accuracies. Therefore, it is crucial to pass dynamic class information in respective applications, which requires an appropriate digital infrastructure.}, language = {en} } @misc{PrasseIversenLienhardetal.2022, author = {Prasse, Paul and Iversen, Pascal and Lienhard, Matthias and Thedinga, Kristina and Herwig, Ralf and Scheffer, Tobias}, title = {Pre-Training on In Vitro and Fine-Tuning on Patient-Derived Data Improves Deep Neural Networks for Anti-Cancer Drug-Sensitivity Prediction}, series = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1866-8372}, doi = {10.25932/publishup-57734}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-577341}, pages = {1 -- 14}, year = {2022}, abstract = {Large-scale databases that report the inhibitory capacities of many combinations of candidate drug compounds and cultivated cancer cell lines have driven the development of preclinical drug-sensitivity models based on machine learning. However, cultivated cell lines have devolved from human cancer cells over years or even decades under selective pressure in culture conditions. Moreover, models that have been trained on in vitro data cannot account for interactions with other types of cells. Drug-response data that are based on patient-derived cell cultures, xenografts, and organoids, on the other hand, are not available in the quantities that are needed to train high-capacity machine-learning models. We found that pre-training deep neural network models of drug sensitivity on in vitro drug-sensitivity databases before fine-tuning the model parameters on patient-derived data improves the models' accuracy and improves the biological plausibility of the features, compared to training only on patient-derived data. From our experiments, we can conclude that pre-trained models outperform models that have been trained on the target domains in the vast majority of cases.}, language = {en} } @misc{AlLabanRegerLucke2022, author = {Al Laban, Firas and Reger, Martin and Lucke, Ulrike}, title = {Closing the Policy Gap in the Academic Bridge}, series = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {1310}, issn = {1866-8372}, doi = {10.25932/publishup-58357}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-583572}, pages = {22}, year = {2022}, abstract = {The highly structured nature of the educational sector demands effective policy mechanisms close to the needs of the field. That is why evidence-based policy making, endorsed by the European Commission under Erasmus+ Key Action 3, aims to make an alignment between the domains of policy and practice. Against this background, this article addresses two issues: First, that there is a vertical gap in the translation of higher-level policies to local strategies and regulations. Second, that there is a horizontal gap between educational domains regarding the policy awareness of individual players. This was analyzed in quantitative and qualitative studies with domain experts from the fields of virtual mobility and teacher training. From our findings, we argue that the combination of both gaps puts the academic bridge from secondary to tertiary education at risk, including the associated knowledge proficiency levels. We discuss the role of digitalization in the academic bridge by asking the question: which value does the involved stakeholders expect from educational policies? As a theoretical basis, we rely on the model of value co-creation for and by stakeholders. We describe the used instruments along with the obtained results and proposed benefits. Moreover, we reflect on the methodology applied, and we finally derive recommendations for future academic bridge policies.}, language = {en} } @misc{Cichalla2022, type = {Master Thesis}, author = {Cichalla, Anika Katleen}, title = {Ein konstruktivistisches Modell f{\"u}r die Didaktik der Informatik im Bachelorstudium}, doi = {10.25932/publishup-55071}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-550710}, school = {Universit{\"a}t Potsdam}, pages = {66}, year = {2022}, abstract = {Lehrende in der Lehrkr{\"a}fteausbildung sind stets damit konfrontiert, dass sie den Studierenden innovative Methoden modernen Schulunterrichts traditionell rezipierend vorstellen. In Deutschland gibt es circa 40 Universit{\"a}ten, die Informatik mit Lehramtsbezug ausbilden. Allerdings gibt es nur wenige Konzepte, die sich mit der Verbindung von Bildungswissenschaften und der Informatik mit ihrer Didaktik besch{\"a}ftigen und keine Konzepte, die eine konstruktivistische Lehre in der Informatik verfolgen. Daher zielt diese Masterarbeit darauf ab, diese L{\"u}cke aufgreifen und anhand des „Didaktik der Informatik I" Moduls der Universit{\"a}t Potsdam ein Modell zur konstruktivistischen Hochschullehre zu entwickeln. Dabei soll ein bestehendes konstruktivistisches Lehrmodell auf die Informatikdidaktik {\"u}bertragen und Elemente zur Verbindung von Bildungswissenschaften, Fachwissenschaften und Fachdidaktiken mit einbezogen werden. Dies kann eine Grundlage f{\"u}r die Planung von Informatikdidaktischen Modulen bieten, aber auch als Inspiration zur {\"U}bertragung bestehender innovativer Lehrkonzepte auf andere Fachdidaktiken dienen. Um ein solches konstruktivistisches Lehr-Lern-Modell zu erstellen, wird zun{\"a}chst der Zusammenhang von Bildungswissenschaften, Fachwissenschaften und Fachdidaktiken erl{\"a}utert und anschließend die Notwendigkeit einer Vernetzung hervorgehoben. Hieran folgt eine Darstellung zu relevanten Lerntheorien und bereits entwickelten innovativen Lernkonzepten. Ankn{\"u}pfend wird darauf eingegangen, welche Anforderungen die Kultusminister- Konferenz an die Ausbildung von Lehrkr{\"a}ften stellt und wie diese Ausbildung f{\"u}r die Informatik momentan an der Universit{\"a}t Potsdam erfolgt. Aus allen Erkenntnissen heraus werden Anforderungen an ein konstruktivistisches Lehrmodell festgelegt. Unter Ber{\"u}cksichtigung der Voraussetzungen der Studienordnung f{\"u}r das Lehramt Informatik wird anschließend ein Modell f{\"u}r konstruktivistische Informatikdidaktik vorgestellt. Weiterf{\"u}hrende Forschung k{\"o}nnte sich damit auseinandersetzen, inwiefern sich die Motivation und Leistung im vergleich zum urspr{\"u}nglichen Modul {\"a}ndert und ob die Kompetenzen zur Unterrichtsplanung und Unterrichtsgestaltung durch das neue Modulkonzept st{\"a}rker ausgebaut werden k{\"o}nnen.}, language = {de} }