@misc{AlLabanRegerLucke2022, author = {Al Laban, Firas and Reger, Martin and Lucke, Ulrike}, title = {Closing the Policy Gap in the Academic Bridge}, series = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {1310}, issn = {1866-8372}, doi = {10.25932/publishup-58357}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-583572}, pages = {22}, year = {2022}, abstract = {The highly structured nature of the educational sector demands effective policy mechanisms close to the needs of the field. That is why evidence-based policy making, endorsed by the European Commission under Erasmus+ Key Action 3, aims to make an alignment between the domains of policy and practice. Against this background, this article addresses two issues: First, that there is a vertical gap in the translation of higher-level policies to local strategies and regulations. Second, that there is a horizontal gap between educational domains regarding the policy awareness of individual players. This was analyzed in quantitative and qualitative studies with domain experts from the fields of virtual mobility and teacher training. From our findings, we argue that the combination of both gaps puts the academic bridge from secondary to tertiary education at risk, including the associated knowledge proficiency levels. We discuss the role of digitalization in the academic bridge by asking the question: which value does the involved stakeholders expect from educational policies? As a theoretical basis, we rely on the model of value co-creation for and by stakeholders. We describe the used instruments along with the obtained results and proposed benefits. Moreover, we reflect on the methodology applied, and we finally derive recommendations for future academic bridge policies.}, language = {en} } @article{AlnoorTiberiusAtiyahetal.2022, author = {Alnoor, Alhamzah and Tiberius, Victor and Atiyah, Abbas Gatea and Khaw, Khai Wah and Yin, Teh Sin and Chew, XinYing and Abbas, Sammar}, title = {How positive and negative electronic word of mouth (eWOM) affects customers' intention to use social commerce?}, series = {International journal of human computer interaction}, journal = {International journal of human computer interaction}, publisher = {Taylor \& Francis}, address = {New York}, issn = {1044-7318}, doi = {10.1080/10447318.2022.2125610}, pages = {1 -- 30}, year = {2022}, abstract = {Advances in Web 2.0 technologies have led to the widespread assimilation of electronic commerce platforms as an innovative shopping method and an alternative to traditional shopping. However, due to pro-technology bias, scholars focus more on adopting technology, and slightly less attention has been given to the impact of electronic word of mouth (eWOM) on customers' intention to use social commerce. This study addresses the gap by examining the intention through exploring the effect of eWOM on males' and females' intentions and identifying the mediation of perceived crowding. To this end, we adopted a dual-stage multi-group structural equation modeling and artificial neural network (SEM-ANN) approach. We successfully extended the eWOM concept by integrating negative and positive factors and perceived crowding. The results reveal the causal and non-compensatory relationships between the constructs. The variables supported by the SEM analysis are adopted as the ANN model's input neurons. According to the natural significance obtained from the ANN approach, males' intentions to accept social commerce are related mainly to helping the company, followed by core functionalities. In contrast, females are highly influenced by technical aspects and mishandling. The ANN model predicts customers' intentions to use social commerce with an accuracy of 97\%. We discuss the theoretical and practical implications of increasing customers' intention toward social commerce channels among consumers based on our findings.}, language = {en} } @phdthesis{Bartz2022, author = {Bartz, Christian}, title = {Reducing the annotation burden: deep learning for optical character recognition using less manual annotations}, doi = {10.25932/publishup-55540}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-555407}, school = {Universit{\"a}t Potsdam}, pages = {xxiv, 183}, year = {2022}, abstract = {Text is a ubiquitous entity in our world and daily life. We encounter it nearly everywhere in shops, on the street, or in our flats. Nowadays, more and more text is contained in digital images. These images are either taken using cameras, e.g., smartphone cameras, or taken using scanning devices such as document scanners. The sheer amount of available data, e.g., millions of images taken by Google Streetview, prohibits manual analysis and metadata extraction. Although much progress was made in the area of optical character recognition (OCR) for printed text in documents, broad areas of OCR are still not fully explored and hold many research challenges. With the mainstream usage of machine learning and especially deep learning, one of the most pressing problems is the availability and acquisition of annotated ground truth for the training of machine learning models because obtaining annotated training data using manual annotation mechanisms is time-consuming and costly. In this thesis, we address of how we can reduce the costs of acquiring ground truth annotations for the application of state-of-the-art machine learning methods to optical character recognition pipelines. To this end, we investigate how we can reduce the annotation cost by using only a fraction of the typically required ground truth annotations, e.g., for scene text recognition systems. We also investigate how we can use synthetic data to reduce the need of manual annotation work, e.g., in the area of document analysis for archival material. In the area of scene text recognition, we have developed a novel end-to-end scene text recognition system that can be trained using inexact supervision and shows competitive/state-of-the-art performance on standard benchmark datasets for scene text recognition. Our method consists of two independent neural networks, combined using spatial transformer networks. Both networks learn together to perform text localization and text recognition at the same time while only using annotations for the recognition task. We apply our model to end-to-end scene text recognition (meaning localization and recognition of words) and pure scene text recognition without any changes in the network architecture. In the second part of this thesis, we introduce novel approaches for using and generating synthetic data to analyze handwriting in archival data. First, we propose a novel preprocessing method to determine whether a given document page contains any handwriting. We propose a novel data synthesis strategy to train a classification model and show that our data synthesis strategy is viable by evaluating the trained model on real images from an archive. Second, we introduce the new analysis task of handwriting classification. Handwriting classification entails classifying a given handwritten word image into classes such as date, word, or number. Such an analysis step allows us to select the best fitting recognition model for subsequent text recognition; it also allows us to reason about the semantic content of a given document page without the need for fine-grained text recognition and further analysis steps, such as Named Entity Recognition. We show that our proposed approaches work well when trained on synthetic data. Further, we propose a flexible metric learning approach to allow zero-shot classification of classes unseen during the network's training. Last, we propose a novel data synthesis algorithm to train off-the-shelf pixel-wise semantic segmentation networks for documents. Our data synthesis pipeline is based on the famous Style-GAN architecture and can synthesize realistic document images with their corresponding segmentation annotation without the need for any annotated data!}, language = {en} } @article{BenderKoerppen2022, author = {Bender, Benedict and K{\"o}rppen, Tim}, title = {Integriert statt isoliert}, series = {Digital business : cloud}, volume = {26}, journal = {Digital business : cloud}, number = {1}, publisher = {WIN-Verlag GmbH \& Co. KG}, address = {Vaterstetten}, issn = {2510-344X}, pages = {26 -- 27}, year = {2022}, abstract = {Dass Daten und Analysen Innovationstreiber sind und nicht mehr nur einen Hygienefaktor darstellen, haben viele Unternehmen erkannt. Um Potenziale zu heben, m{\"u}ssen Daten zielf{\"u}hrend integriert werden. Komplexe Systemlandschaften und isolierte Datenbest{\"a}nde erschweren dies. Technologien f{\"u}r die erfolgreiche Umsetzung von datengetriebenem Management m{\"u}ssen richtig eingesetzt werden.}, language = {de} } @misc{BenlianWienerCrametal.2022, author = {Benlian, Alexander and Wiener, Martin and Cram, W. Alec and Krasnova, Hanna and Maedche, Alexander and Mohlmann, Mareike and Recker, Jan and Remus, Ulrich}, title = {Algorithmic management}, series = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Wirtschafts- und Sozialwissenschaftliche Reihe}, journal = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Wirtschafts- und Sozialwissenschaftliche Reihe}, number = {6}, issn = {2363-7005}, doi = {10.25932/publishup-60711}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-607112}, pages = {17}, year = {2022}, language = {en} } @article{BenlianWienerCrametal.2022, author = {Benlian, Alexander and Wiener, Martin and Cram, W. Alec and Krasnova, Hanna and Maedche, Alexander and Mohlmann, Mareike and Recker, Jan and Remus, Ulrich}, title = {Algorithmic management}, series = {Business and information systems engineering}, volume = {64}, journal = {Business and information systems engineering}, number = {6}, publisher = {Springer Gabler}, address = {Wiesbaden}, issn = {2363-7005}, doi = {10.1007/s12599-022-00764-w}, pages = {825 -- 839}, year = {2022}, language = {en} } @article{BlaesiusFriedrichLischeidetal.2022, author = {Bl{\"a}sius, Thomas and Friedrich, Tobias and Lischeid, Julius and Meeks, Kitty and Schirneck, Friedrich Martin}, title = {Efficiently enumerating hitting sets of hypergraphs arising in data profiling}, series = {Journal of computer and system sciences : JCSS}, volume = {124}, journal = {Journal of computer and system sciences : JCSS}, publisher = {Elsevier}, address = {San Diego}, issn = {0022-0000}, doi = {10.1016/j.jcss.2021.10.002}, pages = {192 -- 213}, year = {2022}, abstract = {The transversal hypergraph problem asks to enumerate the minimal hitting sets of a hypergraph. If the solutions have bounded size, Eiter and Gottlob [SICOMP'95] gave an algorithm running in output-polynomial time, but whose space requirement also scales with the output. We improve this to polynomial delay and space. Central to our approach is the extension problem, deciding for a set X of vertices whether it is contained in any minimal hitting set. We show that this is one of the first natural problems to be W[3]-complete. We give an algorithm for the extension problem running in time O(m(vertical bar X vertical bar+1) n) and prove a SETH-lower bound showing that this is close to optimal. We apply our enumeration method to the discovery problem of minimal unique column combinations from data profiling. Our empirical evaluation suggests that the algorithm outperforms its worst-case guarantees on hypergraphs stemming from real-world databases.}, language = {en} } @article{BonifatiMiorNaumannetal.2022, author = {Bonifati, Angela and Mior, Michael J. and Naumann, Felix and Noack, Nele Sina}, title = {How inclusive are we?}, series = {SIGMOD record / Association for Computing Machinery, Special Interest Group on Management of Data}, volume = {50}, journal = {SIGMOD record / Association for Computing Machinery, Special Interest Group on Management of Data}, number = {4}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {0163-5808}, doi = {10.1145/3516431.3516438}, pages = {30 -- 35}, year = {2022}, abstract = {ACM SIGMOD, VLDB and other database organizations have committed to fostering an inclusive and diverse community, as do many other scientific organizations. Recently, different measures have been taken to advance these goals, especially for underrepresented groups. One possible measure is double-blind reviewing, which aims to hide gender, ethnicity, and other properties of the authors.
We report the preliminary results of a gender diversity analysis of publications of the database community across several peer-reviewed venues, and also compare women's authorship percentages in both single-blind and double-blind venues along the years. We also obtained a cross comparison of the obtained results in data management with other relevant areas in Computer Science.}, language = {en} } @phdthesis{Boeken2022, author = {B{\"o}ken, Bj{\"o}rn}, title = {Improving prediction accuracy using dynamic information}, doi = {10.25932/publishup-58512}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-585125}, school = {Universit{\"a}t Potsdam}, pages = {xii, 160}, year = {2022}, abstract = {Accurately solving classification problems nowadays is likely to be the most relevant machine learning task. Binary classification separating two classes only is algorithmically simpler but has fewer potential applications as many real-world problems are multi-class. On the reverse, separating only a subset of classes simplifies the classification task. Even though existing multi-class machine learning algorithms are very flexible regarding the number of classes, they assume that the target set Y is fixed and cannot be restricted once the training is finished. On the other hand, existing state-of-the-art production environments are becoming increasingly interconnected with the advance of Industry 4.0 and related technologies such that additional information can simplify the respective classification problems. In light of this, the main aim of this thesis is to introduce dynamic classification that generalizes multi-class classification such that the target class set can be restricted arbitrarily to a non-empty class subset M of Y at any time between two consecutive predictions. This task is solved by a combination of two algorithmic approaches. First, classifier calibration, which transforms predictions into posterior probability estimates that are intended to be well calibrated. The analysis provided focuses on monotonic calibration and in particular corrects wrong statements that appeared in the literature. It also reveals that bin-based evaluation metrics, which became popular in recent years, are unjustified and should not be used at all. Next, the validity of Platt scaling, which is the most relevant parametric calibration approach, is analyzed in depth. In particular, its optimality for classifier predictions distributed according to four different families of probability distributions as well its equivalence with Beta calibration up to a sigmoidal preprocessing are proven. For non-monotonic calibration, extended variants on kernel density estimation and the ensemble method EKDE are introduced. Finally, the calibration techniques are evaluated using a simulation study with complete information as well as on a selection of 46 real-world data sets. Building on this, classifier calibration is applied as part of decomposition-based classification that aims to reduce multi-class problems to simpler (usually binary) prediction tasks. For the involved fusing step performed at prediction time, a new approach based on evidence theory is presented that uses classifier calibration to model mass functions. This allows the analysis of decomposition-based classification against a strictly formal background and to prove closed-form equations for the overall combinations. Furthermore, the same formalism leads to a consistent integration of dynamic class information, yielding a theoretically justified and computationally tractable dynamic classification model. The insights gained from this modeling are combined with pairwise coupling, which is one of the most relevant reduction-based classification approaches, such that all individual predictions are combined with a weight. This not only generalizes existing works on pairwise coupling but also enables the integration of dynamic class information. Lastly, a thorough empirical study is performed that compares all newly introduced approaches to existing state-of-the-art techniques. For this, evaluation metrics for dynamic classification are introduced that depend on corresponding sampling strategies. Thereafter, these are applied during a three-part evaluation. First, support vector machines and random forests are applied on 26 data sets from the UCI Machine Learning Repository. Second, two state-of-the-art deep neural networks are evaluated on five benchmark data sets from a relatively recent reference work. Here, computationally feasible strategies to apply the presented algorithms in combination with large-scale models are particularly relevant because a naive application is computationally intractable. Finally, reference data from a real-world process allowing the inclusion of dynamic class information are collected and evaluated. The results show that in combination with support vector machines and random forests, pairwise coupling approaches yield the best results, while in combination with deep neural networks, differences between the different approaches are mostly small to negligible. Most importantly, all results empirically confirm that dynamic classification succeeds in improving the respective prediction accuracies. Therefore, it is crucial to pass dynamic class information in respective applications, which requires an appropriate digital infrastructure.}, language = {en} } @article{ChenLangeAndjelkovicetal.2022, author = {Chen, Junchao and Lange, Thomas and Andjelkovic, Marko and Simevski, Aleksandar and Lu, Li and Krstić, Miloš}, title = {Solar particle event and single event upset prediction from SRAM-based monitor and supervised machine learning}, series = {IEEE transactions on emerging topics in computing / IEEE Computer Society, Institute of Electrical and Electronics Engineers}, volume = {10}, journal = {IEEE transactions on emerging topics in computing / IEEE Computer Society, Institute of Electrical and Electronics Engineers}, number = {2}, publisher = {Institute of Electrical and Electronics Engineers}, address = {[New York, NY]}, issn = {2168-6750}, doi = {10.1109/TETC.2022.3147376}, pages = {564 -- 580}, year = {2022}, abstract = {The intensity of cosmic radiation may differ over five orders of magnitude within a few hours or days during the Solar Particle Events (SPEs), thus increasing for several orders of magnitude the probability of Single Event Upsets (SEUs) in space-borne electronic systems. Therefore, it is vital to enable the early detection of the SEU rate changes in order to ensure timely activation of dynamic radiation hardening measures. In this paper, an embedded approach for the prediction of SPEs and SRAM SEU rate is presented. The proposed solution combines the real-time SRAM-based SEU monitor, the offline-trained machine learning model and online learning algorithm for the prediction. With respect to the state-of-the-art, our solution brings the following benefits: (1) Use of existing on-chip data storage SRAM as a particle detector, thus minimizing the hardware and power overhead, (2) Prediction of SRAM SEU rate one hour in advance, with the fine-grained hourly tracking of SEU variations during SPEs as well as under normal conditions, (3) Online optimization of the prediction model for enhancing the prediction accuracy during run-time, (4) Negligible cost of hardware accelerator design for the implementation of selected machine learning model and online learning algorithm. The proposed design is intended for a highly dependable and self-adaptive multiprocessing system employed in space applications, allowing to trigger the radiation mitigation mechanisms before the onset of high radiation levels.}, language = {en} }