@article{ZhelavskayaAseevShprits2021, author = {Zhelavskaya, Irina and Aseev, Nikita and Shprits, Yuri}, title = {A combined neural network- and physics-based approach for modeling plasmasphere dynamics}, series = {JGR / AGU, American Geographical Union. Space Physics}, volume = {126}, journal = {JGR / AGU, American Geographical Union. Space Physics}, number = {3}, publisher = {Wiley}, address = {Hoboken, NJ}, issn = {2169-9380}, doi = {10.1029/2020JA028077}, pages = {30}, year = {2021}, abstract = {Abstract In recent years, feedforward neural networks (NNs) have been successfully applied to reconstruct global plasmasphere dynamics in the equatorial plane. These neural network-based models capture the large-scale dynamics of the plasmasphere, such as plume formation and erosion of the plasmasphere on the nightside. However, their performance depends strongly on the availability of training data. When the data coverage is limited or non-existent, as occurs during geomagnetic storms, the performance of NNs significantly decreases, as networks inherently cannot learn from the limited number of examples. This limitation can be overcome by employing physics-based modeling during strong geomagnetic storms. Physics-based models show a stable performance during periods of disturbed geomagnetic activity if they are correctly initialized and configured. In this study, we illustrate how to combine the neural network- and physics-based models of the plasmasphere in an optimal way by using data assimilation. The proposed approach utilizes advantages of both neural network- and physics-based modeling and produces global plasma density reconstructions for both quiet and disturbed geomagnetic activity, including extreme geomagnetic storms. We validate the models quantitatively by comparing their output to the in-situ density measurements from RBSP-A for an 18-month out-of-sample period from June 30, 2016 to January 01, 2018 and computing performance metrics. To validate the global density reconstructions qualitatively, we compare them to the IMAGE EUV images of the He+ particle distribution in the Earth's plasmasphere for a number of events in the past, including the Halloween storm in 2003.}, language = {en} } @book{ZhangPlauthEberhardtetal.2020, author = {Zhang, Shuhao and Plauth, Max and Eberhardt, Felix and Polze, Andreas and Lehmann, Jens and Sejdiu, Gezim and Jabeen, Hajira and Servadei, Lorenzo and M{\"o}stl, Christian and B{\"a}r, Florian and Netzeband, Andr{\´e} and Schmidt, Rainer and Knigge, Marlene and Hecht, Sonja and Prifti, Loina and Krcmar, Helmut and Sapegin, Andrey and Jaeger, David and Cheng, Feng and Meinel, Christoph and Friedrich, Tobias and Rothenberger, Ralf and Sutton, Andrew M. and Sidorova, Julia A. and Lundberg, Lars and Rosander, Oliver and Sk{\"o}ld, Lars and Di Varano, Igor and van der Walt, Est{\´e}e and Eloff, Jan H. P. and Fabian, Benjamin and Baumann, Annika and Ermakova, Tatiana and Kelkel, Stefan and Choudhary, Yash and Cooray, Thilini and Rodr{\´i}guez, Jorge and Medina-P{\´e}rez, Miguel Angel and Trejo, Luis A. and Barrera-Animas, Ari Yair and Monroy-Borja, Ra{\´u}l and L{\´o}pez-Cuevas, Armando and Ram{\´i}rez-M{\´a}rquez, Jos{\´e} Emmanuel and Grohmann, Maria and Niederleithinger, Ernst and Podapati, Sasidhar and Schmidt, Christopher and Huegle, Johannes and de Oliveira, Roberto C. L. and Soares, F{\´a}bio Mendes and van Hoorn, Andr{\´e} and Neumer, Tamas and Willnecker, Felix and Wilhelm, Mathias and Kuster, Bernhard}, title = {HPI Future SOC Lab - Proceedings 2017}, number = {130}, editor = {Meinel, Christoph and Polze, Andreas and Beins, Karsten and Strotmann, Rolf and Seibold, Ulrich and R{\"o}dszus, Kurt and M{\"u}ller, J{\"u}rgen}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-475-3}, issn = {1613-5652}, doi = {10.25932/publishup-43310}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-433100}, publisher = {Universit{\"a}t Potsdam}, pages = {ix, 235}, year = {2020}, abstract = {The "HPI Future SOC Lab" is a cooperation of the Hasso Plattner Institute (HPI) and industry partners. Its mission is to enable and promote exchange and interaction between the research community and the industry partners. The HPI Future SOC Lab provides researchers with free of charge access to a complete infrastructure of state of the art hard and software. This infrastructure includes components, which might be too expensive for an ordinary research environment, such as servers with up to 64 cores and 2 TB main memory. The offerings address researchers particularly from but not limited to the areas of computer science and business information systems. Main areas of research include cloud computing, parallelization, and In-Memory technologies. This technical report presents results of research projects executed in 2017. Selected projects have presented their results on April 25th and November 15th 2017 at the Future SOC Lab Day events.}, language = {en} } @phdthesis{Zali2023, author = {Zali, Zahra}, title = {Volcanic tremor analysis based on advanced signal processing concepts including music information retrieval (MIR) strategies}, doi = {10.25932/publishup-61086}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-610866}, school = {Universit{\"a}t Potsdam}, pages = {viii, 95}, year = {2023}, abstract = {Volcanoes are one of the Earth's most dynamic zones and responsible for many changes in our planet. Volcano seismology aims to provide an understanding of the physical processes in volcanic systems and anticipate the style and timing of eruptions by analyzing the seismic records. Volcanic tremor signals are usually observed in the seismic records before or during volcanic eruptions. Their analysis contributes to evaluate the evolving volcanic activity and potentially predict eruptions. Years of continuous seismic monitoring now provide useful information for operational eruption forecasting. The continuously growing amount of seismic recordings, however, poses a challenge for analysis, information extraction, and interpretation, to support timely decision making during volcanic crises. Furthermore, the complexity of eruption processes and precursory activities makes the analysis challenging. A challenge in studying seismic signals of volcanic origin is the coexistence of transient signal swarms and long-lasting volcanic tremor signals. Separating transient events from volcanic tremors can, therefore, contribute to improving our understanding of the underlying physical processes. Some similar issues (data reduction, source separation, extraction, and classification) are addressed in the context of music information retrieval (MIR). The signal characteristics of acoustic and seismic recordings comprise a number of similarities. This thesis is going beyond classical signal analysis techniques usually employed in seismology by exploiting similarities of seismic and acoustic signals and building the information retrieval strategy on the expertise developed in the field of MIR. First, inspired by the idea of harmonic-percussive separation (HPS) in musical signal processing, I have developed a method to extract harmonic volcanic tremor signals and to detect transient events from seismic recordings. This provides a clean tremor signal suitable for tremor investigation along with a characteristic function suitable for earthquake detection. Second, using HPS algorithms, I have developed a noise reduction technique for seismic signals. This method is especially useful for denoising ocean bottom seismometers, which are highly contaminated by noise. The advantage of this method compared to other denoising techniques is that it doesn't introduce distortion to the broadband earthquake waveforms, which makes it reliable for different applications in passive seismological analysis. Third, to address the challenge of extracting information from high-dimensional data and investigating the complex eruptive phases, I have developed an advanced machine learning model that results in a comprehensive signal processing scheme for volcanic tremors. Using this method seismic signatures of major eruptive phases can be automatically detected. This helps to provide a chronology of the volcanic system. Also, this model is capable to detect weak precursory volcanic tremors prior to the eruption, which could be used as an indicator of imminent eruptive activity. The extracted patterns of seismicity and their temporal variations finally provide an explanation for the transition mechanism between eruptive phases.}, language = {en} } @article{WulffMientusNowaketal.2023, author = {Wulff, Peter and Mientus, Lukas and Nowak, Anna and Borowski, Andreas}, title = {KI-basierte Auswertung von schriftlichen Unterrichtsreflexionen im Fach Physik und automatisierte R{\"u}ckmeldung}, series = {PSI-Potsdam: Ergebnisbericht zu den Aktivit{\"a}ten im Rahmen der Qualit{\"a}tsoffensive Lehrerbildung (2019-2023) (Potsdamer Beitr{\"a}ge zur Lehrerbildung und Bildungsforschung ; 3)}, journal = {PSI-Potsdam: Ergebnisbericht zu den Aktivit{\"a}ten im Rahmen der Qualit{\"a}tsoffensive Lehrerbildung (2019-2023) (Potsdamer Beitr{\"a}ge zur Lehrerbildung und Bildungsforschung ; 3)}, number = {3}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-568-2}, issn = {2626-3556}, doi = {10.25932/publishup-61636}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-616363}, pages = {103 -- 115}, year = {2023}, abstract = {F{\"u}r die Entwicklung professioneller Handlungskompetenzen angehender Lehrkr{\"a}fte stellt die Unterrichtsreflexion ein wichtiges Instrument dar, um Theoriewissen und Praxiserfahrungen in Beziehung zu setzen. Die Auswertung von Unterrichtsreflexionen und eine entsprechende R{\"u}ckmeldung stellt Forschende und Dozierende allerdings vor praktische wie theoretische Herausforderungen. Im Kontext der Forschung zu K{\"u}nstlicher Intelligenz (KI) entwickelte Methoden bieten hier neue Potenziale. Der Beitrag stellt {\"u}berblicksartig zwei Teilstudien vor, die mit Hilfe von KI-Methoden wie dem maschinellen Lernen untersuchen, inwieweit eine Auswertung von Unterrichtsreflexionen angehender Physiklehrkr{\"a}fte auf Basis eines theoretisch abgeleiteten Reflexionsmodells und die automatisierte R{\"u}ckmeldung hierzu m{\"o}glich sind. Dabei wurden unterschiedliche Ans{\"a}tze des maschinellen Lernens verwendet, um modellbasierte Klassifikation und Exploration von Themen in Unterrichtsreflexionen umzusetzen. Die Genauigkeit der Ergebnisse wurde vor allem durch sog. Große Sprachmodelle gesteigert, die auch den Transfer auf andere Standorte und F{\"a}cher erm{\"o}glichen. F{\"u}r die fachdidaktische Forschung bedeuten sie jedoch wiederum neue Herausforderungen, wie etwa systematische Verzerrungen und Intransparenz von Entscheidungen. Dennoch empfehlen wir, die Potenziale der KI-basierten Methoden gr{\"u}ndlicher zu erforschen und konsequent in der Praxis (etwa in Form von Webanwendungen) zu implementieren.}, language = {de} } @article{WulffBuschhueterWestphaletal.2020, author = {Wulff, Peter and Buschh{\"u}ter, David and Westphal, Andrea and Nowak, Anna and Becker, Lisa and Robalino, Hugo and Stede, Manfred and Borowski, Andreas}, title = {Computer-based classification of preservice physics teachers' written reflections}, series = {Journal of science education and technology}, volume = {30}, journal = {Journal of science education and technology}, number = {1}, publisher = {Springer}, address = {Dordrecht}, issn = {1059-0145}, doi = {10.1007/s10956-020-09865-1}, pages = {1 -- 15}, year = {2020}, abstract = {Reflecting in written form on one's teaching enactments has been considered a facilitator for teachers' professional growth in university-based preservice teacher education. Writing a structured reflection can be facilitated through external feedback. However, researchers noted that feedback in preservice teacher education often relies on holistic, rather than more content-based, analytic feedback because educators oftentimes lack resources (e.g., time) to provide more analytic feedback. To overcome this impediment to feedback for written reflection, advances in computer technology can be of use. Hence, this study sought to utilize techniques of natural language processing and machine learning to train a computer-based classifier that classifies preservice physics teachers' written reflections on their teaching enactments in a German university teacher education program. To do so, a reflection model was adapted to physics education. It was then tested to what extent the computer-based classifier could accurately classify the elements of the reflection model in segments of preservice physics teachers' written reflections. Multinomial logistic regression using word count as a predictor was found to yield acceptable average human-computer agreement (F1-score on held-out test dataset of 0.56) so that it might fuel further development towards an automated feedback tool that supplements existing holistic feedback for written reflections with data-based, analytic feedback.}, language = {en} } @article{WilkschAbramova2023, author = {Wilksch, Moritz and Abramova, Olga}, title = {PyFin-sentiment}, series = {International journal of information management data insights}, volume = {3}, journal = {International journal of information management data insights}, number = {1}, publisher = {Elsevier}, address = {Amsterdam}, issn = {2667-0968}, doi = {10.1016/j.jjimei.2023.100171}, pages = {10}, year = {2023}, abstract = {Responding to the poor performance of generic automated sentiment analysis solutions on domain-specific texts, we collect a dataset of 10,000 tweets discussing the topics of finance and investing. We manually assign each tweet its market sentiment, i.e., the investor's anticipation of a stock's future return. Using this data, we show that all existing sentiment models trained on adjacent domains struggle with accurate market sentiment analysis due to the task's specialized vocabulary. Consequently, we design, train, and deploy our own sentiment model. It outperforms all previous models (VADER, NTUSD-Fin, FinBERT, TwitterRoBERTa) when evaluated on Twitter posts. On posts from a different platform, our model performs on par with BERT-based large language models. We achieve this result at a fraction of the training and inference costs due to the model's simple design. We publish the artifact as a python library to facilitate its use by future researchers and practitioners.}, language = {en} } @book{Weber2023, author = {Weber, Benedikt}, title = {Human pose estimation for decubitus prophylaxis}, number = {153}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-551-4}, issn = {1613-5652}, doi = {10.25932/publishup-56719}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-567196}, publisher = {Universit{\"a}t Potsdam}, pages = {73}, year = {2023}, abstract = {Decubitus is one of the most relevant diseases in nursing and the most expensive to treat. It is caused by sustained pressure on tissue, so it particularly affects bed-bound patients. This work lays a foundation for pressure mattress-based decubitus prophylaxis by implementing a solution to the single-frame 2D Human Pose Estimation problem. For this, methods of Deep Learning are employed. Two approaches are examined, a coarse-to-fine Convolutional Neural Network for direct regression of joint coordinates and a U-Net for the derivation of probability distribution heatmaps. We conclude that training our models on a combined dataset of the publicly available Bodies at Rest and SLP data yields the best results. Furthermore, various preprocessing techniques are investigated, and a hyperparameter optimization is performed to discover an improved model architecture. Another finding indicates that the heatmap-based approach outperforms direct regression. This model achieves a mean per-joint position error of 9.11 cm for the Bodies at Rest data and 7.43 cm for the SLP data. We find that it generalizes well on data from mattresses other than those seen during training but has difficulties detecting the arms correctly. Additionally, we give a brief overview of the medical data annotation tool annoto we developed in the bachelor project and furthermore conclude that the Scrum framework and agile practices enhanced our development workflow.}, language = {en} } @article{VaidSomaniRussaketal.2020, author = {Vaid, Akhil and Somani, Sulaiman and Russak, Adam J. and De Freitas, Jessica K. and Chaudhry, Fayzan F. and Paranjpe, Ishan and Johnson, Kipp W. and Lee, Samuel J. and Miotto, Riccardo and Richter, Felix and Zhao, Shan and Beckmann, Noam D. and Naik, Nidhi and Kia, Arash and Timsina, Prem and Lala, Anuradha and Paranjpe, Manish and Golden, Eddye and Danieletto, Matteo and Singh, Manbir and Meyer, Dara and O'Reilly, Paul F. and Huckins, Laura and Kovatch, Patricia and Finkelstein, Joseph and Freeman, Robert M. and Argulian, Edgar and Kasarskis, Andrew and Percha, Bethany and Aberg, Judith A. and Bagiella, Emilia and Horowitz, Carol R. and Murphy, Barbara and Nestler, Eric J. and Schadt, Eric E. and Cho, Judy H. and Cordon-Cardo, Carlos and Fuster, Valentin and Charney, Dennis S. and Reich, David L. and B{\"o}ttinger, Erwin and Levin, Matthew A. and Narula, Jagat and Fayad, Zahi A. and Just, Allan C. and Charney, Alexander W. and Nadkarni, Girish N. and Glicksberg, Benjamin S.}, title = {Machine learning to predict mortality and critical events in a cohort of patients with COVID-19 in New York City: model development and validation}, series = {Journal of medical internet research : international scientific journal for medical research, information and communication on the internet ; JMIR}, volume = {22}, journal = {Journal of medical internet research : international scientific journal for medical research, information and communication on the internet ; JMIR}, number = {11}, publisher = {Healthcare World}, address = {Richmond, Va.}, issn = {1439-4456}, doi = {10.2196/24018}, pages = {19}, year = {2020}, abstract = {Background: COVID-19 has infected millions of people worldwide and is responsible for several hundred thousand fatalities. The COVID-19 pandemic has necessitated thoughtful resource allocation and early identification of high-risk patients. However, effective methods to meet these needs are lacking. Objective: The aims of this study were to analyze the electronic health records (EHRs) of patients who tested positive for COVID-19 and were admitted to hospitals in the Mount Sinai Health System in New York City; to develop machine learning models for making predictions about the hospital course of the patients over clinically meaningful time horizons based on patient characteristics at admission; and to assess the performance of these models at multiple hospitals and time points. Methods: We used Extreme Gradient Boosting (XGBoost) and baseline comparator models to predict in-hospital mortality and critical events at time windows of 3, 5, 7, and 10 days from admission. Our study population included harmonized EHR data from five hospitals in New York City for 4098 COVID-19-positive patients admitted from March 15 to May 22, 2020. The models were first trained on patients from a single hospital (n=1514) before or on May 1, externally validated on patients from four other hospitals (n=2201) before or on May 1, and prospectively validated on all patients after May 1 (n=383). Finally, we established model interpretability to identify and rank variables that drive model predictions. Results: Upon cross-validation, the XGBoost classifier outperformed baseline models, with an area under the receiver operating characteristic curve (AUC-ROC) for mortality of 0.89 at 3 days, 0.85 at 5 and 7 days, and 0.84 at 10 days. XGBoost also performed well for critical event prediction, with an AUC-ROC of 0.80 at 3 days, 0.79 at 5 days, 0.80 at 7 days, and 0.81 at 10 days. In external validation, XGBoost achieved an AUC-ROC of 0.88 at 3 days, 0.86 at 5 days, 0.86 at 7 days, and 0.84 at 10 days for mortality prediction. Similarly, the unimputed XGBoost model achieved an AUC-ROC of 0.78 at 3 days, 0.79 at 5 days, 0.80 at 7 days, and 0.81 at 10 days. Trends in performance on prospective validation sets were similar. At 7 days, acute kidney injury on admission, elevated LDH, tachypnea, and hyperglycemia were the strongest drivers of critical event prediction, while higher age, anion gap, and C-reactive protein were the strongest drivers of mortality prediction. Conclusions: We externally and prospectively trained and validated machine learning models for mortality and critical events for patients with COVID-19 at different time horizons. These models identified at-risk patients and uncovered underlying relationships that predicted outcomes.}, language = {en} } @article{VaidChanChaudharyetal.2021, author = {Vaid, Akhil and Chan, Lili and Chaudhary, Kumardeep and Jaladanki, Suraj K. and Paranjpe, Ishan and Russak, Adam J. and Kia, Arash and Timsina, Prem and Levin, Matthew A. and He, John Cijiang and B{\"o}ttinger, Erwin and Charney, Alexander W. and Fayad, Zahi A. and Coca, Steven G. and Glicksberg, Benjamin S. and Nadkarni, Girish N.}, title = {Predictive approaches for acute dialysis requirement and death in COVID-19}, series = {Clinical journal of the American Society of Nephrology : CJASN}, volume = {16}, journal = {Clinical journal of the American Society of Nephrology : CJASN}, number = {8}, publisher = {American Society of Nephrology}, address = {Washington}, organization = {MSCIC}, issn = {1555-9041}, doi = {10.2215/CJN.17311120}, pages = {1158 -- 1168}, year = {2021}, abstract = {Background and objectives AKI treated with dialysis initiation is a common complication of coronavirus disease 2019 (COVID-19) among hospitalized patients. However, dialysis supplies and personnel are often limited. Design, setting, participants, \& measurements Using data from adult patients hospitalized with COVID-19 from five hospitals from theMount Sinai Health System who were admitted between March 10 and December 26, 2020, we developed and validated several models (logistic regression, Least Absolute Shrinkage and Selection Operator (LASSO), random forest, and eXtreme GradientBoosting [XGBoost; with and without imputation]) for predicting treatment with dialysis or death at various time horizons (1, 3, 5, and 7 days) after hospital admission. Patients admitted to theMount Sinai Hospital were used for internal validation, whereas the other hospitals formed part of the external validation cohort. Features included demographics, comorbidities, and laboratory and vital signs within 12 hours of hospital admission. Results A total of 6093 patients (2442 in training and 3651 in external validation) were included in the final cohort. Of the different modeling approaches used, XGBoost without imputation had the highest area under the receiver operating characteristic (AUROC) curve on internal validation (range of 0.93-0.98) and area under the precisionrecall curve (AUPRC; range of 0.78-0.82) for all time points. XGBoost without imputation also had the highest test parameters on external validation (AUROC range of 0.85-0.87, and AUPRC range of 0.27-0.54) across all time windows. XGBoost without imputation outperformed all models with higher precision and recall (mean difference in AUROC of 0.04; mean difference in AUPRC of 0.15). Features of creatinine, BUN, and red cell distribution width were major drivers of the model's prediction. Conclusions An XGBoost model without imputation for prediction of a composite outcome of either death or dialysis in patients positive for COVID-19 had the best performance, as compared with standard and other machine learning models.}, language = {en} } @article{TongNikoloski2020, author = {Tong, Hao and Nikoloski, Zoran}, title = {Machine learning approaches for crop improvement}, series = {Journal of plant physiology : biochemistry, physiology, molecular biology and biotechnology of plants}, volume = {257}, journal = {Journal of plant physiology : biochemistry, physiology, molecular biology and biotechnology of plants}, publisher = {Elsevier}, address = {M{\"u}nchen}, issn = {0176-1617}, doi = {10.1016/j.jplph.2020.153354}, pages = {13}, year = {2020}, abstract = {Highly efficient and accurate selection of elite genotypes can lead to dramatic shortening of the breeding cycle in major crops relevant for sustaining present demands for food, feed, and fuel. In contrast to classical approaches that emphasize the need for resource-intensive phenotyping at all stages of artificial selection, genomic selection dramatically reduces the need for phenotyping. Genomic selection relies on advances in machine learning and the availability of genotyping data to predict agronomically relevant phenotypic traits. Here we provide a systematic review of machine learning approaches applied for genomic selection of single and multiple traits in major crops in the past decade. We emphasize the need to gather data on intermediate phenotypes, e.g. metabolite, protein, and gene expression levels, along with developments of modeling techniques that can lead to further improvements of genomic selection. In addition, we provide a critical view of factors that affect genomic selection, with attention to transferability of models between different environments. Finally, we highlight the future aspects of integrating high-throughput molecular phenotypic data from omics technologies with biological networks for crop improvement.}, language = {en} }