@article{PrasseKnaebelMachlicaetal.2019, author = {Prasse, Paul and Knaebel, Rene and Machlica, Lukas and Pevny, Tomas and Scheffer, Tobias}, title = {Joint detection of malicious domains and infected clients}, series = {Machine learning}, volume = {108}, journal = {Machine learning}, number = {8-9}, publisher = {Springer}, address = {Dordrecht}, issn = {0885-6125}, doi = {10.1007/s10994-019-05789-z}, pages = {1353 -- 1368}, year = {2019}, abstract = {Detection of malware-infected computers and detection of malicious web domains based on their encrypted HTTPS traffic are challenging problems, because only addresses, timestamps, and data volumes are observable. The detection problems are coupled, because infected clients tend to interact with malicious domains. Traffic data can be collected at a large scale, and antivirus tools can be used to identify infected clients in retrospect. Domains, by contrast, have to be labeled individually after forensic analysis. We explore transfer learning based on sluice networks; this allows the detection models to bootstrap each other. In a large-scale experimental study, we find that the model outperforms known reference models and detects previously unknown malware, previously unknown malware families, and previously unknown malicious domains.}, language = {en} } @phdthesis{AbdelwahabHusseinAbdelwahabElsayed2019, author = {Abdelwahab Hussein Abdelwahab Elsayed, Ahmed}, title = {Probabilistic, deep, and metric learning for biometric identification from eye movements}, doi = {10.25932/publishup-46798}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-467980}, school = {Universit{\"a}t Potsdam}, pages = {vi, 65}, year = {2019}, abstract = {A central insight from psychological studies on human eye movements is that eye movement patterns are highly individually characteristic. They can, therefore, be used as a biometric feature, that is, subjects can be identified based on their eye movements. This thesis introduces new machine learning methods to identify subjects based on their eye movements while viewing arbitrary content. The thesis focuses on probabilistic modeling of the problem, which has yielded the best results in the most recent literature. The thesis studies the problem in three phases by proposing a purely probabilistic, probabilistic deep learning, and probabilistic deep metric learning approach. In the first phase, the thesis studies models that rely on psychological concepts about eye movements. Recent literature illustrates that individual-specific distributions of gaze patterns can be used to accurately identify individuals. In these studies, models were based on a simple parametric family of distributions. Such simple parametric models can be robustly estimated from sparse data, but have limited flexibility to capture the differences between individuals. Therefore, this thesis proposes a semiparametric model of gaze patterns that is flexible yet robust for individual identification. These patterns can be understood as domain knowledge derived from psychological literature. Fixations and saccades are examples of simple gaze patterns. The proposed semiparametric densities are drawn under a Gaussian process prior centered at a simple parametric distribution. Thus, the model will stay close to the parametric class of densities if little data is available, but it can also deviate from this class if enough data is available, increasing the flexibility of the model. The proposed method is evaluated on a large-scale dataset, showing significant improvements over the state-of-the-art. Later, the thesis replaces the model based on gaze patterns derived from psychological concepts with a deep neural network that can learn more informative and complex patterns from raw eye movement data. As previous work has shown that the distribution of these patterns across a sequence is informative, a novel statistical aggregation layer called the quantile layer is introduced. It explicitly fits the distribution of deep patterns learned directly from the raw eye movement data. The proposed deep learning approach is end-to-end learnable, such that the deep model learns to extract informative, short local patterns while the quantile layer learns to approximate the distributions of these patterns. Quantile layers are a generic approach that can converge to standard pooling layers or have a more detailed description of the features being pooled, depending on the problem. The proposed model is evaluated in a large-scale study using the eye movements of subjects viewing arbitrary visual input. The model improves upon the standard pooling layers and other statistical aggregation layers proposed in the literature. It also improves upon the state-of-the-art eye movement biometrics by a wide margin. Finally, for the model to identify any subject — not just the set of subjects it is trained on — a metric learning approach is developed. Metric learning learns a distance function over instances. The metric learning model maps the instances into a metric space, where sequences of the same individual are close, and sequences of different individuals are further apart. This thesis introduces a deep metric learning approach with distributional embeddings. The approach represents sequences as a set of continuous distributions in a metric space; to achieve this, a new loss function based on Wasserstein distances is introduced. The proposed method is evaluated on multiple domains besides eye movement biometrics. This approach outperforms the state of the art in deep metric learning in several domains while also outperforming the state of the art in eye movement biometrics.}, language = {en} } @misc{Strickroth2019, author = {Strickroth, Sven}, title = {PLATON}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {804}, issn = {1866-8372}, doi = {10.25932/publishup-44188}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-441887}, pages = {28}, year = {2019}, abstract = {Lesson planning is both an important and demanding task—especially as part of teacher training. This paper presents the requirements for a lesson planning system and evaluates existing systems regarding these requirements. One major drawback of existing software tools is that most are limited to a text- or form-based representation of the lesson designs. In this article, a new approach with a graphical, time-based representation with (automatic) analyses methods is proposed and the system architecture and domain model are described in detail. The approach is implemented in an interactive, web-based prototype called PLATON, which additionally supports the management of lessons in units as well as the modelling of teacher and student-generated resources. The prototype was evaluated in a study with 61 prospective teachers (bachelor's and master's preservice teachers as well as teacher trainees in post-university teacher training) in Berlin, Germany, with a focus on usability. The results show that this approach proofed usable for lesson planning and offers positive effects for the perception of time and self-reflection.}, language = {en} } @article{MetrefCosmeLeSommeretal.2019, author = {Metref, Sammy and Cosme, Emmanuel and Le Sommer, Julien and Poel, Nora and Brankart, Jean-Michel and Verron, Jacques and Gomez Navarro, Laura}, title = {Reduction of spatially structured errors in Wide-Swath altimetric satellite data using data assimilation}, series = {Remote sensing}, volume = {11}, journal = {Remote sensing}, number = {11}, publisher = {MDPI}, address = {Basel}, issn = {2072-4292}, doi = {10.3390/rs11111336}, pages = {21}, year = {2019}, abstract = {The Surface Water and Ocean Topography (SWOT) mission is a next generation satellite mission expected to provide a 2 km-resolution observation of the sea surface height (SSH) on a two-dimensional swath. Processing SWOT data will be challenging because of the large amount of data, the mismatch between a high spatial resolution and a low temporal resolution, and the observation errors. The present paper focuses on the reduction of the spatially structured errors of SWOT SSH data. It investigates a new error reduction method and assesses its performance in an observing system simulation experiment. The proposed error-reduction method first projects the SWOT SSH onto a subspace spanned by the SWOT spatially structured errors. This projection is removed from the SWOT SSH to obtain a detrended SSH. The detrended SSH is then processed within an ensemble data assimilation analysis to retrieve a full SSH field. In the latter step, the detrending is applied to both the SWOT data and an ensemble of model-simulated SSH fields. Numerical experiments are performed with synthetic SWOT observations and an ensemble from a North Atlantic, 1/60 degrees simulation of the ocean circulation (NATL60). The data assimilation analysis is carried out with an ensemble Kalman filter. The results are assessed with root mean square errors, power spectrum density, and spatial coherence. They show that a significant part of the large scale SWOT errors is reduced. The filter analysis also reduces the small scale errors and allows for an accurate recovery of the energy of the signal down to 25 km scales. In addition, using the SWOT nadir data to adjust the SSH detrending further reduces the errors.}, language = {en} } @article{DimopoulosGebserLuehneetal.2019, author = {Dimopoulos, Yannis and Gebser, Martin and L{\"u}hne, Patrick and Romero Davila, Javier and Schaub, Torsten H.}, title = {plasp 3}, series = {Theory and practice of logic programming}, volume = {19}, journal = {Theory and practice of logic programming}, number = {3}, publisher = {Cambridge Univ. Press}, address = {New York}, issn = {1471-0684}, doi = {10.1017/S1471068418000583}, pages = {477 -- 504}, year = {2019}, abstract = {We describe the new version of the Planning Domain Definition Language (PDDL)-to-Answer Set Programming (ASP) translator plasp. First, it widens the range of accepted PDDL features. Second, it contains novel planning encodings, some inspired by Satisfiability Testing (SAT) planning and others exploiting ASP features such as well-foundedness. All of them are designed for handling multivalued fluents in order to capture both PDDL as well as SAS planning formats. Third, enabled by multishot ASP solving, it offers advanced planning algorithms also borrowed from SAT planning. As a result, plasp provides us with an ASP-based framework for studying a variety of planning techniques in a uniform setting. Finally, we demonstrate in an empirical analysis that these techniques have a significant impact on the performance of ASP planning.}, language = {en} } @misc{Przybylla2019, author = {Przybylla, Mareen}, title = {Interactive objects in physical computing and their role in the learning process}, series = {Constructivist foundations}, volume = {14}, journal = {Constructivist foundations}, number = {3}, publisher = {Vrije Univ.}, address = {Bussels}, issn = {1782-348X}, pages = {264 -- 266}, year = {2019}, abstract = {The target article discusses the question of how educational makerspaces can become places supportive of knowledge construction. This question is too often neglected by people who run makerspaces, as they mostly explain how to use different tools and focus on the creation of a product. In makerspaces, often pupils also engage in physical computing activities and thus in the creation of interactive artifacts containing embedded systems, such as smart shoes or wristbands, plant monitoring systems or drink mixing machines. This offers the opportunity to reflect on teaching physical computing in computer science education, where similarly often the creation of the product is so strongly focused upon that the reflection of the learning process is pushed into the background.}, language = {en} } @article{PousttchiGleiss2019, author = {Pousttchi, Key and Gleiß, Alexander}, title = {Surrounded by middlemen - how multi-sided platforms change the insurance industry}, series = {Electron Markets}, volume = {29}, journal = {Electron Markets}, number = {4}, publisher = {Springer}, address = {Heidelberg}, issn = {1019-6781}, doi = {10.1007/s12525-019-00363-w}, pages = {609 -- 629}, year = {2019}, abstract = {Multi-sided platforms (MSP) strongly affect markets and play a crucial part within the digital and networked economy. Although empirical evidence indicates their occurrence in many industries, research has not investigated the game-changing impact of MSP on traditional markets to a sufficient extent. More specifically, we have little knowledge of how MSP affect value creation and customer interaction in entire markets, exploiting the potential of digital technologies to offer new value propositions. Our paper addresses this research gap and provides an initial systematic approach to analyze the impact of MSP on the insurance industry. For this purpose, we analyze the state of the art in research and practice in order to develop a reference model of the value network for the insurance industry. On this basis, we conduct a case-study analysis to discover and analyze roles which are occupied or even newly created by MSP. As a final step, we categorize MSP with regard to their relation to traditional insurance companies, resulting in a classification scheme with four MSP standard types: Competition, Coordination, Cooperation, Collaboration.}, language = {en} } @article{WaitelonisJuergesSack2019, author = {Waitelonis, J{\"o}rg and J{\"u}rges, Henrik and Sack, Harald}, title = {Remixing entity linking evaluation datasets for focused benchmarking}, series = {Semantic Web}, volume = {10}, journal = {Semantic Web}, number = {2}, publisher = {IOS Press}, address = {Amsterdam}, issn = {1570-0844}, doi = {10.3233/SW-180334}, pages = {385 -- 412}, year = {2019}, abstract = {In recent years, named entity linking (NEL) tools were primarily developed in terms of a general approach, whereas today numerous tools are focusing on specific domains such as e.g. the mapping of persons and organizations only, or the annotation of locations or events in microposts. However, the available benchmark datasets necessary for the evaluation of NEL tools do not reflect this focalizing trend. We have analyzed the evaluation process applied in the NEL benchmarking framework GERBIL [in: Proceedings of the 24th International Conference on World Wide Web (WWW'15), International World Wide Web Conferences Steering Committee, Republic and Canton of Geneva, Switzerland, 2015, pp. 1133-1143, Semantic Web 9(5) (2018), 605-625] and all its benchmark datasets. Based on these insights we have extended the GERBIL framework to enable a more fine grained evaluation and in depth analysis of the available benchmark datasets with respect to different emphases. This paper presents the implementation of an adaptive filter for arbitrary entities and customized benchmark creation as well as the automated determination of typical NEL benchmark dataset properties, such as the extent of content-related ambiguity and diversity. These properties are integrated on different levels, which also enables to tailor customized new datasets out of the existing ones by remixing documents based on desired emphases. Besides a new system library to enrich provided NIF [in: International Semantic Web Conference (ISWC'13), Lecture Notes in Computer Science, Vol. 8219, Springer, Berlin, Heidelberg, 2013, pp. 98-113] datasets with statistical information, best practices for dataset remixing are presented, and an in depth analysis of the performance of entity linking systems on special focus datasets is presented.}, language = {en} } @article{FriouxSchaubSchellhornetal.2019, author = {Frioux, Cl{\´e}mence and Schaub, Torsten H. and Schellhorn, Sebastian and Siegel, Anne and Wanko, Philipp}, title = {Hybrid metabolic network completion}, series = {Theory and practice of logic programming}, volume = {19}, journal = {Theory and practice of logic programming}, number = {1}, publisher = {Cambridge University Press}, address = {New York}, issn = {1471-0684}, doi = {10.1017/S1471068418000455}, pages = {83 -- 108}, year = {2019}, abstract = {Metabolic networks play a crucial role in biology since they capture all chemical reactions in an organism. While there are networks of high quality for many model organisms, networks for less studied organisms are often of poor quality and suffer from incompleteness. To this end, we introduced in previous work an answer set programming (ASP)-based approach to metabolic network completion. Although this qualitative approach allows for restoring moderately degraded networks, it fails to restore highly degraded ones. This is because it ignores quantitative constraints capturing reaction rates. To address this problem, we propose a hybrid approach to metabolic network completion that integrates our qualitative ASP approach with quantitative means for capturing reaction rates. We begin by formally reconciling existing stoichiometric and topological approaches to network completion in a unified formalism. With it, we develop a hybrid ASP encoding and rely upon the theory reasoning capacities of the ASP system dingo for solving the resulting logic program with linear constraints over reals. We empirically evaluate our approach by means of the metabolic network of Escherichia coli. Our analysis shows that our novel approach yields greatly superior results than obtainable from purely qualitative or quantitative approaches.}, language = {en} } @misc{FichteHecherMeier2019, author = {Fichte, Johannes Klaus and Hecher, Markus and Meier, Arne}, title = {Counting Complexity for Reasoning in Abstract Argumentation}, series = {The Thirty-Third AAAI Conference on Artificial Intelligence, the Thirty-First Innovative Applications of Artificial Intelligence Conference, the Ninth AAAI Symposium on Educational Advances in Artificial Intelligence}, journal = {The Thirty-Third AAAI Conference on Artificial Intelligence, the Thirty-First Innovative Applications of Artificial Intelligence Conference, the Ninth AAAI Symposium on Educational Advances in Artificial Intelligence}, publisher = {AAAI Press}, address = {Palo Alto}, isbn = {978-1-57735-809-1}, pages = {2827 -- 2834}, year = {2019}, abstract = {In this paper, we consider counting and projected model counting of extensions in abstract argumentation for various semantics. When asking for projected counts we are interested in counting the number of extensions of a given argumentation framework while multiple extensions that are identical when restricted to the projected arguments count as only one projected extension. We establish classical complexity results and parameterized complexity results when the problems are parameterized by treewidth of the undirected argumentation graph. To obtain upper bounds for counting projected extensions, we introduce novel algorithms that exploit small treewidth of the undirected argumentation graph of the input instance by dynamic programming (DP). Our algorithms run in time double or triple exponential in the treewidth depending on the considered semantics. Finally, we take the exponential time hypothesis (ETH) into account and establish lower bounds of bounded treewidth algorithms for counting extensions and projected extension.}, language = {en} }