@phdthesis{AbdelwahabHusseinAbdelwahabElsayed2019, author = {Abdelwahab Hussein Abdelwahab Elsayed, Ahmed}, title = {Probabilistic, deep, and metric learning for biometric identification from eye movements}, doi = {10.25932/publishup-46798}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-467980}, school = {Universit{\"a}t Potsdam}, pages = {vi, 65}, year = {2019}, abstract = {A central insight from psychological studies on human eye movements is that eye movement patterns are highly individually characteristic. They can, therefore, be used as a biometric feature, that is, subjects can be identified based on their eye movements. This thesis introduces new machine learning methods to identify subjects based on their eye movements while viewing arbitrary content. The thesis focuses on probabilistic modeling of the problem, which has yielded the best results in the most recent literature. The thesis studies the problem in three phases by proposing a purely probabilistic, probabilistic deep learning, and probabilistic deep metric learning approach. In the first phase, the thesis studies models that rely on psychological concepts about eye movements. Recent literature illustrates that individual-specific distributions of gaze patterns can be used to accurately identify individuals. In these studies, models were based on a simple parametric family of distributions. Such simple parametric models can be robustly estimated from sparse data, but have limited flexibility to capture the differences between individuals. Therefore, this thesis proposes a semiparametric model of gaze patterns that is flexible yet robust for individual identification. These patterns can be understood as domain knowledge derived from psychological literature. Fixations and saccades are examples of simple gaze patterns. The proposed semiparametric densities are drawn under a Gaussian process prior centered at a simple parametric distribution. Thus, the model will stay close to the parametric class of densities if little data is available, but it can also deviate from this class if enough data is available, increasing the flexibility of the model. The proposed method is evaluated on a large-scale dataset, showing significant improvements over the state-of-the-art. Later, the thesis replaces the model based on gaze patterns derived from psychological concepts with a deep neural network that can learn more informative and complex patterns from raw eye movement data. As previous work has shown that the distribution of these patterns across a sequence is informative, a novel statistical aggregation layer called the quantile layer is introduced. It explicitly fits the distribution of deep patterns learned directly from the raw eye movement data. The proposed deep learning approach is end-to-end learnable, such that the deep model learns to extract informative, short local patterns while the quantile layer learns to approximate the distributions of these patterns. Quantile layers are a generic approach that can converge to standard pooling layers or have a more detailed description of the features being pooled, depending on the problem. The proposed model is evaluated in a large-scale study using the eye movements of subjects viewing arbitrary visual input. The model improves upon the standard pooling layers and other statistical aggregation layers proposed in the literature. It also improves upon the state-of-the-art eye movement biometrics by a wide margin. Finally, for the model to identify any subject — not just the set of subjects it is trained on — a metric learning approach is developed. Metric learning learns a distance function over instances. The metric learning model maps the instances into a metric space, where sequences of the same individual are close, and sequences of different individuals are further apart. This thesis introduces a deep metric learning approach with distributional embeddings. The approach represents sequences as a set of continuous distributions in a metric space; to achieve this, a new loss function based on Wasserstein distances is introduced. The proposed method is evaluated on multiple domains besides eye movement biometrics. This approach outperforms the state of the art in deep metric learning in several domains while also outperforming the state of the art in eye movement biometrics.}, language = {en} } @phdthesis{Ahmad2014, author = {Ahmad, Nadeem}, title = {People centered HMI's for deaf and functionally illiterate users}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-70391}, school = {Universit{\"a}t Potsdam}, year = {2014}, abstract = {The objective and motivation behind this research is to provide applications with easy-to-use interfaces to communities of deaf and functionally illiterate users, which enables them to work without any human assistance. Although recent years have witnessed technological advancements, the availability of technology does not ensure accessibility to information and communication technologies (ICT). Extensive use of text from menus to document contents means that deaf or functionally illiterate can not access services implemented on most computer software. Consequently, most existing computer applications pose an accessibility barrier to those who are unable to read fluently. Online technologies intended for such groups should be developed in continuous partnership with primary users and include a thorough investigation into their limitations, requirements and usability barriers. In this research, I investigated existing tools in voice, web and other multimedia technologies to identify learning gaps and explored ways to enhance the information literacy for deaf and functionally illiterate users. I worked on the development of user-centered interfaces to increase the capabilities of deaf and low literacy users by enhancing lexical resources and by evaluating several multimedia interfaces for them. The interface of the platform-independent Italian Sign Language (LIS) Dictionary has been developed to enhance the lexical resources for deaf users. The Sign Language Dictionary accepts Italian lemmas as input and provides their representation in the Italian Sign Language as output. The Sign Language dictionary has 3082 signs as set of Avatar animations in which each sign is linked to a corresponding Italian lemma. I integrated the LIS lexical resources with MultiWordNet (MWN) database to form the first LIS MultiWordNet(LMWN). LMWN contains information about lexical relations between words, semantic relations between lexical concepts (synsets), correspondences between Italian and sign language lexical concepts and semantic fields (domains). The approach enhances the deaf users' understanding of written Italian language and shows that a relatively small set of lexicon can cover a significant portion of MWN. Integration of LIS signs with MWN made it useful tool for computational linguistics and natural language processing. The rule-based translation process from written Italian text to LIS has been transformed into service-oriented system. The translation process is composed of various modules including parser, semantic interpreter, generator, and spatial allocation planner. This translation procedure has been implemented in the Java Application Building Center (jABC), which is a framework for extreme model driven design (XMDD). The XMDD approach focuses on bringing software development closer to conceptual design, so that the functionality of a software solution could be understood by someone who is unfamiliar with programming concepts. The transformation addresses the heterogeneity challenge and enhances the re-usability of the system. For enhancing the e-participation of functionally illiterate users, two detailed studies were conducted in the Republic of Rwanda. In the first study, the traditional (textual) interface was compared with the virtual character-based interactive interface. The study helped to identify usability barriers and users evaluated these interfaces according to three fundamental areas of usability, i.e. effectiveness, efficiency and satisfaction. In another study, we developed four different interfaces to analyze the usability and effects of online assistance (consistent help) for functionally illiterate users and compared different help modes including textual, vocal and virtual character on the performance of semi-literate users. In our newly designed interfaces the instructions were automatically translated in Swahili language. All the interfaces were evaluated on the basis of task accomplishment, time consumption, System Usability Scale (SUS) rating and number of times the help was acquired. The results show that the performance of semi-literate users improved significantly when using the online assistance. The dissertation thus introduces a new development approach in which virtual characters are used as additional support for barely literate or naturally challenged users. Such components enhanced the application utility by offering a variety of services like translating contents in local language, providing additional vocal information, and performing automatic translation from text to sign language. Obviously, there is no such thing as one design solution that fits for all in the underlying domain. Context sensitivity, literacy and mental abilities are key factors on which I concentrated and the results emphasize that computer interfaces must be based on a thoughtful definition of target groups, purposes and objectives.}, language = {en} } @phdthesis{AlAreqi2017, author = {Al-Areqi, Samih Taha Mohammed}, title = {Semantics-based automatic geospatial service composition}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-402616}, school = {Universit{\"a}t Potsdam}, pages = {xvi, 163}, year = {2017}, abstract = {Although it has become common practice to build applications based on the reuse of existing components or services, technical complexity and semantic challenges constitute barriers to ensuring a successful and wide reuse of components and services. In the geospatial application domain, the barriers are self-evident due to heterogeneous geographic data, a lack of interoperability and complex analysis processes. Constructing workflows manually and discovering proper services and data that match user intents and preferences is difficult and time-consuming especially for users who are not trained in software development. Furthermore, considering the multi-objective nature of environmental modeling for the assessment of climate change impacts and the various types of geospatial data (e.g., formats, scales, and georeferencing systems) increases the complexity challenges. Automatic service composition approaches that provide semantics-based assistance in the process of workflow design have proven to be a solution to overcome these challenges and have become a frequent demand especially by end users who are not IT experts. In this light, the major contributions of this thesis are: (i) Simplification of service reuse and workflow design of applications for climate impact analysis by following the eXtreme Model-Driven Development (XMDD) paradigm. (ii) Design of a semantic domain model for climate impact analysis applications that comprises specifically designed services, ontologies that provide domain-specific vocabulary for referring to types and services, and the input/output annotation of the services using the terms defined in the ontologies. (iii) Application of a constraint-driven method for the automatic composition of workflows for analyzing the impacts of sea-level rise. The application scenario demonstrates the impact of domain modeling decisions on the results and the performance of the synthesis algorithm.}, language = {en} } @phdthesis{AlSaffar2016, author = {Al-Saffar, Loay Talib Ahmed}, title = {Analysing prerequisites, expectations, apprehensions, and attitudes of university students studying Computer science}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-98437}, school = {Universit{\"a}t Potsdam}, pages = {xii, 131}, year = {2016}, abstract = {The main objective of this dissertation is to analyse prerequisites, expectations, apprehensions, and attitudes of students studying computer science, who are willing to gain a bachelor degree. The research will also investigate in the students' learning style according to the Felder-Silverman model. These investigations fall in the attempt to make an impact on reducing the "dropout"/shrinkage rate among students, and to suggest a better learning environment. The first investigation starts with a survey that has been made at the computer science department at the University of Baghdad to investigate the attitudes of computer science students in an environment dominated by women, showing the differences in attitudes between male and female students in different study years. Students are accepted to university studies via a centrally controlled admission procedure depending mainly on their final score at school. This leads to a high percentage of students studying subjects they do not want. Our analysis shows that 75\% of the female students do not regret studying computer science although it was not their first choice. And according to statistics over previous years, women manage to succeed in their study and often graduate on top of their class. We finish with a comparison of attitudes between the freshman students of two different cultures and two different university enrolment procedures (University of Baghdad, in Iraq, and the University of Potsdam, in Germany) both with opposite gender majority. The second step of investigation took place at the department of computer science at the University of Potsdam in Germany and analyzes the learning styles of students studying the three major fields of study offered by the department (computer science, business informatics, and computer science teaching). Investigating the differences in learning styles between the students of those study fields who usually take some joint courses is important to be aware of which changes are necessary to be adopted in the teaching methods to address those different students. It was a two stage study using two questionnaires; the main one is based on the Index of Learning Styles Questionnaire of B. A. Solomon and R. M. Felder, and the second questionnaire was an investigation on the students' attitudes towards the findings of their personal first questionnaire. Our analysis shows differences in the preferences of learning style between male and female students of the different study fields, as well as differences between students with the different specialties (computer science, business informatics, and computer science teaching). The third investigation looks closely into the difficulties, issues, apprehensions and expectations of freshman students studying computer science. The study took place at the computer science department at the University of Potsdam with a volunteer sample of students. The goal is to determine and discuss the difficulties and issues that they are facing in their study that may lead them to think in dropping-out, changing the study field, or changing the university. The research continued with the same sample of students (with business informatics students being the majority) through more than three semesters. Difficulties and issues during the study were documented, as well as students' attitudes, apprehensions, and expectations. Some of the professors and lecturers opinions and solutions to some students' problems were also documented. Many participants had apprehensions and difficulties, especially towards informatics subjects. Some business informatics participants began to think of changing the university, in particular when they reached their third semester, others thought about changing their field of study. Till the end of this research, most of the participants continued in their studies (the study they have started with or the new study they have changed to) without leaving the higher education system.}, language = {en} } @phdthesis{Andjelkovic2021, author = {Andjelkovic, Marko}, title = {A methodology for characterization, modeling and mitigation of single event transient effects in CMOS standard combinational cells}, doi = {10.25932/publishup-53484}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-534843}, school = {Universit{\"a}t Potsdam}, pages = {xxiv, 216}, year = {2021}, abstract = {With the downscaling of CMOS technologies, the radiation-induced Single Event Transient (SET) effects in combinational logic have become a critical reliability issue for modern integrated circuits (ICs) intended for operation under harsh radiation conditions. The SET pulses generated in combinational logic may propagate through the circuit and eventually result in soft errors. It has thus become an imperative to address the SET effects in the early phases of the radiation-hard IC design. In general, the soft error mitigation solutions should accommodate both static and dynamic measures to ensure the optimal utilization of available resources. An efficient soft-error-aware design should address synergistically three main aspects: (i) characterization and modeling of soft errors, (ii) multi-level soft error mitigation, and (iii) online soft error monitoring. Although significant results have been achieved, the effectiveness of SET characterization methods, accuracy of predictive SET models, and efficiency of SET mitigation measures are still critical issues. Therefore, this work addresses the following topics: (i) Characterization and modeling of SET effects in standard combinational cells, (ii) Static mitigation of SET effects in standard combinational cells, and (iii) Online particle detection, as a support for dynamic soft error mitigation. Since the standard digital libraries are widely used in the design of radiation-hard ICs, the characterization of SET effects in standard cells and the availability of accurate SET models for the Soft Error Rate (SER) evaluation are the main prerequisites for efficient radiation-hard design. This work introduces an approach for the SPICE-based standard cell characterization with the reduced number of simulations, improved SET models and optimized SET sensitivity database. It has been shown that the inherent similarities in the SET response of logic cells for different input levels can be utilized to reduce the number of required simulations. Based on characterization results, the fitting models for the SET sensitivity metrics (critical charge, generated SET pulse width and propagated SET pulse width) have been developed. The proposed models are based on the principle of superposition, and they express explicitly the dependence of the SET sensitivity of individual combinational cells on design, operating and irradiation parameters. In contrast to the state-of-the-art characterization methodologies which employ extensive look-up tables (LUTs) for storing the simulation results, this work proposes the use of LUTs for storing the fitting coefficients of the SET sensitivity models derived from the characterization results. In that way the amount of characterization data in the SET sensitivity database is reduced significantly. The initial step in enhancing the robustness of combinational logic is the application of gate-level mitigation techniques. As a result, significant improvement of the overall SER can be achieved with minimum area, delay and power overheads. For the SET mitigation in standard cells, it is essential to employ the techniques that do not require modifying the cell structure. This work introduces the use of decoupling cells for improving the robustness of standard combinational cells. By insertion of two decoupling cells at the output of a target cell, the critical charge of the cell's output node is increased and the attenuation of short SETs is enhanced. In comparison to the most common gate-level techniques (gate upsizing and gate duplication), the proposed approach provides better SET filtering. However, as there is no single gate-level mitigation technique with optimal performance, a combination of multiple techniques is required. This work introduces a comprehensive characterization of gate-level mitigation techniques aimed to quantify their impact on the SET robustness improvement, as well as introduced area, delay and power overhead per gate. By characterizing the gate-level mitigation techniques together with the standard cells, the required effort in subsequent SER analysis of a target design can be reduced. The characterization database of the hardened standard cells can be utilized as a guideline for selection of the most appropriate mitigation solution for a given design. As a support for dynamic soft error mitigation techniques, it is important to enable the online detection of energetic particles causing the soft errors. This allows activating the power-greedy fault-tolerant configurations based on N-modular redundancy only at the high radiation levels. To enable such a functionality, it is necessary to monitor both the particle flux and the variation of particle LET, as these two parameters contribute significantly to the system SER. In this work, a particle detection approach based on custom-sized pulse stretching inverters is proposed. Employing the pulse stretching inverters connected in parallel enables to measure the particle flux in terms of the number of detected SETs, while the particle LET variations can be estimated from the distribution of SET pulse widths. This approach requires a purely digital processing logic, in contrast to the standard detectors which require complex mixed-signal processing. Besides the possibility of LET monitoring, additional advantages of the proposed particle detector are low detection latency and power consumption, and immunity to error accumulation. The results achieved in this thesis can serve as a basis for establishment of an overall soft-error-aware database for a given digital library, and a comprehensive multi-level radiation-hard design flow that can be implemented with the standard IC design tools. The following step will be to evaluate the achieved results with the irradiation experiments.}, language = {en} } @phdthesis{Awad2010, author = {Awad, Ahmed Mahmoud Hany Aly}, title = {A compliance management framework for business process models}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-49222}, school = {Universit{\"a}t Potsdam}, year = {2010}, abstract = {Companies develop process models to explicitly describe their business operations. In the same time, business operations, business processes, must adhere to various types of compliance requirements. Regulations, e.g., Sarbanes Oxley Act of 2002, internal policies, best practices are just a few sources of compliance requirements. In some cases, non-adherence to compliance requirements makes the organization subject to legal punishment. In other cases, non-adherence to compliance leads to loss of competitive advantage and thus loss of market share. Unlike the classical domain-independent behavioral correctness of business processes, compliance requirements are domain-specific. Moreover, compliance requirements change over time. New requirements might appear due to change in laws and adoption of new policies. Compliance requirements are offered or enforced by different entities that have different objectives behind these requirements. Finally, compliance requirements might affect different aspects of business processes, e.g., control flow and data flow. As a result, it is infeasible to hard-code compliance checks in tools. Rather, a repeatable process of modeling compliance rules and checking them against business processes automatically is needed. This thesis provides a formal approach to support process design-time compliance checking. Using visual patterns, it is possible to model compliance requirements concerning control flow, data flow and conditional flow rules. Each pattern is mapped into a temporal logic formula. The thesis addresses the problem of consistency checking among various compliance requirements, as they might stem from divergent sources. Also, the thesis contributes to automatically check compliance requirements against process models using model checking. We show that extra domain knowledge, other than expressed in compliance rules, is needed to reach correct decisions. In case of violations, we are able to provide a useful feedback to the user. The feedback is in the form of parts of the process model whose execution causes the violation. In some cases, our approach is capable of providing automated remedy of the violation.}, language = {en} } @phdthesis{Bickel2008, author = {Bickel, Steffen}, title = {Learning under differing training and test distributions}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-33331}, school = {Universit{\"a}t Potsdam}, year = {2008}, abstract = {One of the main problems in machine learning is to train a predictive model from training data and to make predictions on test data. Most predictive models are constructed under the assumption that the training data is governed by the exact same distribution which the model will later be exposed to. In practice, control over the data collection process is often imperfect. A typical scenario is when labels are collected by questionnaires and one does not have access to the test population. For example, parts of the test population are underrepresented in the survey, out of reach, or do not return the questionnaire. In many applications training data from the test distribution are scarce because they are difficult to obtain or very expensive. Data from auxiliary sources drawn from similar distributions are often cheaply available. This thesis centers around learning under differing training and test distributions and covers several problem settings with different assumptions on the relationship between training and test distributions-including multi-task learning and learning under covariate shift and sample selection bias. Several new models are derived that directly characterize the divergence between training and test distributions, without the intermediate step of estimating training and test distributions separately. The integral part of these models are rescaling weights that match the rescaled or resampled training distribution to the test distribution. Integrated models are studied where only one optimization problem needs to be solved for learning under differing distributions. With a two-step approximation to the integrated models almost any supervised learning algorithm can be adopted to biased training data. In case studies on spam filtering, HIV therapy screening, targeted advertising, and other applications the performance of the new models is compared to state-of-the-art reference methods.}, language = {en} } @phdthesis{Blum2010, author = {Blum, Niklas}, title = {Formalization of a converged internet and telecommunications service environment}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-51146}, school = {Universit{\"a}t Potsdam}, year = {2010}, abstract = {The programmable network envisioned in the 1990s within standardization and research for the Intelligent Network is currently coming into reality using IPbased Next Generation Networks (NGN) and applying Service-Oriented Architecture (SOA) principles for service creation, execution, and hosting. SOA is the foundation for both next-generation telecommunications and middleware architectures, which are rapidly converging on top of commodity transport services. Services such as triple/quadruple play, multimedia messaging, and presence are enabled by the emerging service-oriented IPMultimedia Subsystem (IMS), and allow telecommunications service providers to maintain, if not improve, their position in the marketplace. SOA becomes the de facto standard in next-generation middleware systems as the system model of choice to interconnect service consumers and providers within and between enterprises. We leverage previous research activities in overlay networking technologies along with recent advances in network abstraction, service exposure, and service creation to develop a paradigm for a service environment providing converged Internet and Telecommunications services that we call Service Broker. Such a Service Broker provides mechanisms to combine and mediate between different service paradigms from the two domains Internet/WWW and telecommunications. Furthermore, it enables the composition of services across these domains and is capable of defining and applying temporal constraints during creation and execution time. By adding network-awareness into the service fabric, such a Service Broker may also act as a next generation network-to-service element allowing the composition of crossdomain and cross-layer network and service resources. The contribution of this research is threefold: first, we analyze and classify principles and technologies from Information Technologies (IT) and telecommunications to identify and discuss issues allowing cross-domain composition in a converging service layer. Second, we discuss service composition methods allowing the creation of converged services on an abstract level; in particular, we present a formalized method for model-checking of such compositions. Finally, we propose a Service Broker architecture converging Internet and Telecom services. This environment enables cross-domain feature interaction in services through formalized obligation policies acting as constraints during service discovery, creation, and execution time.}, language = {en} } @phdthesis{Brauer2010, author = {Brauer, Falk}, title = {Extraktion und Identifikation von Entit{\"a}ten in Textdaten im Umfeld der Enterprise Search}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-51409}, school = {Universit{\"a}t Potsdam}, year = {2010}, abstract = {Die automatische Informationsextraktion (IE) aus unstrukturierten Texten erm{\"o}glicht v{\"o}llig neue Wege, auf relevante Informationen zuzugreifen und deren Inhalte zu analysieren, die weit {\"u}ber bisherige Verfahren zur Stichwort-basierten Dokumentsuche hinausgehen. Die Entwicklung von Programmen zur Extraktion von maschinenlesbaren Daten aus Texten erfordert jedoch nach wie vor die Entwicklung von dom{\"a}nenspezifischen Extraktionsprogrammen. Insbesondere im Bereich der Enterprise Search (der Informationssuche im Unternehmensumfeld), in dem eine große Menge von heterogenen Dokumenttypen existiert, ist es oft notwendig ad-hoc Programm-module zur Extraktion von gesch{\"a}ftsrelevanten Entit{\"a}ten zu entwickeln, die mit generischen Modulen in monolithischen IE-Systemen kombiniert werden. Dieser Umstand ist insbesondere kritisch, da potentiell f{\"u}r jeden einzelnen Anwendungsfall ein von Grund auf neues IE-System entwickelt werden muss. Die vorliegende Dissertation untersucht die effiziente Entwicklung und Ausf{\"u}hrung von IE-Systemen im Kontext der Enterprise Search und effektive Methoden zur Ausnutzung bekannter strukturierter Daten im Unternehmenskontext f{\"u}r die Extraktion und Identifikation von gesch{\"a}ftsrelevanten Entit{\"a}ten in Doku-menten. Grundlage der Arbeit ist eine neuartige Plattform zur Komposition von IE-Systemen auf Basis der Beschreibung des Datenflusses zwischen generischen und anwendungsspezifischen IE-Modulen. Die Plattform unterst{\"u}tzt insbesondere die Entwicklung und Wiederverwendung von generischen IE-Modulen und zeichnet sich durch eine h{\"o}here Flexibilit{\"a}t und Ausdrucksm{\"a}chtigkeit im Vergleich zu vorherigen Methoden aus. Ein in der Dissertation entwickeltes Verfahren zur Dokumentverarbeitung interpretiert den Daten-austausch zwischen IE-Modulen als Datenstr{\"o}me und erm{\"o}glicht damit eine weitgehende Parallelisierung von einzelnen Modulen. Die autonome Ausf{\"u}hrung der Module f{\"u}hrt zu einer wesentlichen Beschleu-nigung der Verarbeitung von Einzeldokumenten und verbesserten Antwortzeiten, z. B. f{\"u}r Extraktions-dienste. Bisherige Ans{\"a}tze untersuchen lediglich die Steigerung des durchschnittlichen Dokumenten-durchsatzes durch verteilte Ausf{\"u}hrung von Instanzen eines IE-Systems. Die Informationsextraktion im Kontext der Enterprise Search unterscheidet sich z. B. von der Extraktion aus dem World Wide Web dadurch, dass in der Regel strukturierte Referenzdaten z. B. in Form von Unternehmensdatenbanken oder Terminologien zur Verf{\"u}gung stehen, die oft auch die Beziehungen von Entit{\"a}ten beschreiben. Entit{\"a}ten im Unternehmensumfeld haben weiterhin bestimmte Charakteristiken: Eine Klasse von relevanten Entit{\"a}ten folgt bestimmten Bildungsvorschriften, die nicht immer bekannt sind, auf die aber mit Hilfe von bekannten Beispielentit{\"a}ten geschlossen werden kann, so dass unbekannte Entit{\"a}ten extrahiert werden k{\"o}nnen. Die Bezeichner der anderen Klasse von Entit{\"a}ten haben eher umschreibenden Charakter. Die korrespondierenden Umschreibungen in Texten k{\"o}nnen variieren, wodurch eine Identifikation derartiger Entit{\"a}ten oft erschwert wird. Zur effizienteren Entwicklung von IE-Systemen wird in der Dissertation ein Verfahren untersucht, das alleine anhand von Beispielentit{\"a}ten effektive Regul{\"a}re Ausdr{\"u}cke zur Extraktion von unbekannten Entit{\"a}ten erlernt und damit den manuellen Aufwand in derartigen Anwendungsf{\"a}llen minimiert. Verschiedene Generalisierungs- und Spezialisierungsheuristiken erkennen Muster auf verschiedenen Abstraktionsebenen und schaffen dadurch einen Ausgleich zwischen Genauigkeit und Vollst{\"a}ndigkeit bei der Extraktion. Bekannte Regellernverfahren im Bereich der Informationsextraktion unterst{\"u}tzen die beschriebenen Problemstellungen nicht, sondern ben{\"o}tigen einen (annotierten) Dokumentenkorpus. Eine Methode zur Identifikation von Entit{\"a}ten, die durch Graph-strukturierte Referenzdaten vordefiniert sind, wird als dritter Schwerpunkt untersucht. Es werden Verfahren konzipiert, welche {\"u}ber einen exakten Zeichenkettenvergleich zwischen Text und Referenzdatensatz hinausgehen und Teil{\"u}bereinstimmungen und Beziehungen zwischen Entit{\"a}ten zur Identifikation und Disambiguierung heranziehen. Das in der Arbeit vorgestellte Verfahren ist bisherigen Ans{\"a}tzen hinsichtlich der Genauigkeit und Vollst{\"a}ndigkeit bei der Identifikation {\"u}berlegen.}, language = {de} } @phdthesis{Brueckner2012, author = {Br{\"u}ckner, Michael}, title = {Prediction games : machine learning in the presence of an adversary}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-203-2}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-60375}, school = {Universit{\"a}t Potsdam}, pages = {x, 121}, year = {2012}, abstract = {In many applications one is faced with the problem of inferring some functional relation between input and output variables from given data. Consider, for instance, the task of email spam filtering where one seeks to find a model which automatically assigns new, previously unseen emails to class spam or non-spam. Building such a predictive model based on observed training inputs (e.g., emails) with corresponding outputs (e.g., spam labels) is a major goal of machine learning. Many learning methods assume that these training data are governed by the same distribution as the test data which the predictive model will be exposed to at application time. That assumption is violated when the test data are generated in response to the presence of a predictive model. This becomes apparent, for instance, in the above example of email spam filtering. Here, email service providers employ spam filters and spam senders engineer campaign templates such as to achieve a high rate of successful deliveries despite any filters. Most of the existing work casts such situations as learning robust models which are unsusceptible against small changes of the data generation process. The models are constructed under the worst-case assumption that these changes are performed such to produce the highest possible adverse effect on the performance of the predictive model. However, this approach is not capable to realistically model the true dependency between the model-building process and the process of generating future data. We therefore establish the concept of prediction games: We model the interaction between a learner, who builds the predictive model, and a data generator, who controls the process of data generation, as an one-shot game. The game-theoretic framework enables us to explicitly model the players' interests, their possible actions, their level of knowledge about each other, and the order at which they decide for an action. We model the players' interests as minimizing their own cost function which both depend on both players' actions. The learner's action is to choose the model parameters and the data generator's action is to perturbate the training data which reflects the modification of the data generation process with respect to the past data. We extensively study three instances of prediction games which differ regarding the order in which the players decide for their action. We first assume that both player choose their actions simultaneously, that is, without the knowledge of their opponent's decision. We identify conditions under which this Nash prediction game has a meaningful solution, that is, a unique Nash equilibrium, and derive algorithms that find the equilibrial prediction model. As a second case, we consider a data generator who is potentially fully informed about the move of the learner. This setting establishes a Stackelberg competition. We derive a relaxed optimization criterion to determine the solution of this game and show that this Stackelberg prediction game generalizes existing prediction models. Finally, we study the setting where the learner observes the data generator's action, that is, the (unlabeled) test data, before building the predictive model. As the test data and the training data may be governed by differing probability distributions, this scenario reduces to learning under covariate shift. We derive a new integrated as well as a two-stage method to account for this data set shift. In case studies on email spam filtering we empirically explore properties of all derived models as well as several existing baseline methods. We show that spam filters resulting from the Nash prediction game as well as the Stackelberg prediction game in the majority of cases outperform other existing baseline methods.}, language = {en} }