@phdthesis{Awad2010, author = {Awad, Ahmed Mahmoud Hany Aly}, title = {A compliance management framework for business process models}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-49222}, school = {Universit{\"a}t Potsdam}, year = {2010}, abstract = {Companies develop process models to explicitly describe their business operations. In the same time, business operations, business processes, must adhere to various types of compliance requirements. Regulations, e.g., Sarbanes Oxley Act of 2002, internal policies, best practices are just a few sources of compliance requirements. In some cases, non-adherence to compliance requirements makes the organization subject to legal punishment. In other cases, non-adherence to compliance leads to loss of competitive advantage and thus loss of market share. Unlike the classical domain-independent behavioral correctness of business processes, compliance requirements are domain-specific. Moreover, compliance requirements change over time. New requirements might appear due to change in laws and adoption of new policies. Compliance requirements are offered or enforced by different entities that have different objectives behind these requirements. Finally, compliance requirements might affect different aspects of business processes, e.g., control flow and data flow. As a result, it is infeasible to hard-code compliance checks in tools. Rather, a repeatable process of modeling compliance rules and checking them against business processes automatically is needed. This thesis provides a formal approach to support process design-time compliance checking. Using visual patterns, it is possible to model compliance requirements concerning control flow, data flow and conditional flow rules. Each pattern is mapped into a temporal logic formula. The thesis addresses the problem of consistency checking among various compliance requirements, as they might stem from divergent sources. Also, the thesis contributes to automatically check compliance requirements against process models using model checking. We show that extra domain knowledge, other than expressed in compliance rules, is needed to reach correct decisions. In case of violations, we are able to provide a useful feedback to the user. The feedback is in the form of parts of the process model whose execution causes the violation. In some cases, our approach is capable of providing automated remedy of the violation.}, language = {en} } @inproceedings{HerreHummel2010, author = {Herre, Heinrich and Hummel, Axel}, title = {A paraconsistent semantics for generalized logic programs}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-41496}, year = {2010}, abstract = {We propose a paraconsistent declarative semantics of possibly inconsistent generalized logic programs which allows for arbitrary formulas in the body and in the head of a rule (i.e. does not depend on the presence of any specific connective, such as negation(-as-failure), nor on any specific syntax of rules). For consistent generalized logic programs this semantics coincides with the stable generated models introduced in [HW97], and for normal logic programs it yields the stable models in the sense of [GL88].}, language = {en} } @inproceedings{GoltzPieth2010, author = {Goltz, Hans-Joachim and Pieth, Norbert}, title = {A tool for generating partition schedules of multiprocessor systems}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-41556}, year = {2010}, abstract = {A deterministic cycle scheduling of partitions at the operating system level is supposed for a multiprocessor system. In this paper, we propose a tool for generating such schedules. We use constraint based programming and develop methods and concepts for a combined interactive and automatic partition scheduling system. This paper is also devoted to basic methods and techniques for modeling and solving this partition scheduling problem. Initial application of our partition scheduling tool has proved successful and demonstrated the suitability of the methods used.}, language = {en} } @inproceedings{HanusKoschnicke2010, author = {Hanus, Michael and Koschnicke, Sven}, title = {An ER-based framework for declarative web programming}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-41447}, year = {2010}, abstract = {We describe a framework to support the implementation of web-based systems to manipulate data stored in relational databases. Since the conceptual model of a relational database is often specified as an entity-relationship (ER) model, we propose to use the ER model to generate a complete implementation in the declarative programming language Curry. This implementation contains operations to create and manipulate entities of the data model, supports authentication, authorization, session handling, and the composition of individual operations to user processes. Furthermore and most important, the implementation ensures the consistency of the database w.r.t. the data dependencies specified in the ER model, i.e., updates initiated by the user cannot lead to an inconsistent state of the database. In order to generate a high-level declarative implementation that can be easily adapted to individual customer requirements, the framework exploits previous works on declarative database programming and web user interface construction in Curry.}, language = {en} } @article{Kiss2010, author = {Kiss, G{\´a}bor}, title = {Analyse der Studienleistungen von Studierenden an der Universit{\"a}t {\´O}buda und deren Implikationen f{\"u}r die Informatikausbildung}, series = {Commentarii informaticae didacticae : (CID)}, journal = {Commentarii informaticae didacticae : (CID)}, number = {4}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-64364}, pages = {71 -- 77}, year = {2010}, abstract = {In der letzten Jahren ist die Zahl der erfolgreichen Pr{\"u}fungen von Studierenden im Informatikkurs des ersten Studienjahres f{\"u}r verschiedene Studieng{\"a}nge an der Universit{\"a}t {\´O}buda stark gesunken. Dies betrifft Pr{\"u}fungen in den Teilgebieten Rechnerarchitektur, Betrieb von Peripherieger{\"a}ten, Bin{\"a}re Codierung und logische Operationen, Computerviren, Computernetze und das Internet, Steganographie und Kryptographie, Betriebsysteme. Mehr als der H{\"a}lfte der Studenten konnte die Pr{\"u}fungen der ersten Semester nicht erfolgreich absolvieren. Die hier vorgelegte Analyse der Studienleistungen zielt darauf ab, Gr{\"u}nde f{\"u}r diese Entwicklung zu identifizieren, die Zahl der Abbrecher zu reduzieren und die Leistungen der Studenten zu verbessern. Die Analyse zeigt, dass die Studenten die erforderlichen Lehrmaterialen erst ein bis zwei Tage vor oder sogar erst am Tag der Klausuren vom Server downloaden, so dass sie nicht mehr hinreichend Zeit zum Lernen haben. Diese Tendenz zeigt sich bei allen Teilgebieten des Studiengangs. Ein Mangel an kontinuierlicher Mitarbeit scheint einer der Gr{\"u}nde f{\"u}r ein fr{\"u}hes Scheitern zu sein. Ferner zeigt sich die Notwendigkeit, dass bei den Lehrangeboten in Informatik auf eine kontinuierliche Kommunikation mit den Studierenden und R{\"u}ckmeldung zu aktuellen Unterrichtsinhalten zu achten ist. Dies kann durch motivierende Maßnahmen zur Teilnahme an den {\"U}bungen oder durch kleine w{\"o}chentliche schriftliche Tests geschehen.}, language = {de} } @article{Raimer2010, author = {Raimer, Stephan}, title = {Aquadrohne, Messdatenerfassung und Co.}, series = {Commentarii informaticae didacticae : (CID)}, journal = {Commentarii informaticae didacticae : (CID)}, number = {4}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-64345}, pages = {59 -- 64}, year = {2010}, abstract = {Projektmanagement-Kompetenzen werden von Unternehmen unterschiedlichster Branchen mit wachsender Priorit{\"a}t betrachtet und eingefordert. Als Beitrag zu einer kompetenzorientierten Ausbildung werden in diesem Paper interdisziplin{\"a}re Studienmodule als Bestandteil des Wirtschaftsinformatik-Studiums vorgestellt. Zielsetzung der Studienmodule ist die Bef{\"a}higung der Studierenden, konkrete Projekte unter Nutzung von standardisierten Werkzeugen und Methoden nach dem IPMA-Standard planen und durchf{\"u}hren zu k{\"o}nnen.}, language = {de} } @phdthesis{Ishebabi2010, author = {Ishebabi, Harold}, title = {Architecture synthesis for adaptive multiprocessor systems on chip}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-41316}, school = {Universit{\"a}t Potsdam}, year = {2010}, abstract = {This thesis presents methods for automated synthesis of flexible chip multiprocessor systems from parallel programs targeted at FPGAs to exploit both task-level parallelism and architecture customization. Automated synthesis is necessitated by the complexity of the design space. A detailed description of the design space is provided in order to determine which parameters should be modeled to facilitate automated synthesis by optimizing a cost function, the emphasis being placed on inclusive modeling of parameters from application, architectural and physical subspaces, as well as their joint coverage in order to avoid pre-constraining the design space. Given a parallel program and a set of an IP library, the automated synthesis problem is to simultaneously (i) select processors (ii) map and schedule tasks to them, and (iii) select one or several networks for inter-task communications such that design constraints and optimization objectives are met. The research objective in this thesis is to find a suitable model for automated synthesis, and to evaluate methods of using the model for architectural optimizations. Our contributions are a holistic approach for the design of such systems, corresponding models to facilitate automated synthesis, evaluation of optimization methods using state of the art integer linear and answer set programming, as well as the development of synthesis heuristics to solve runtime challenges.}, language = {en} } @inproceedings{FanMasuharaAotanietal.2010, author = {Fan, Yang and Masuhara, Hidehiko and Aotani, Tomoyuki and Nielson, Flemming and Nielson, Hanne Riis}, title = {AspectKE*: Security aspects with program analysis for distributed systems}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-41369}, year = {2010}, abstract = {Enforcing security policies to distributed systems is difficult, in particular, when a system contains untrusted components. We designed AspectKE*, a distributed AOP language based on a tuple space, to tackle this issue. In AspectKE*, aspects can enforce access control policies that depend on future behavior of running processes. One of the key language features is the predicates and functions that extract results of static program analysis, which are useful for defining security aspects that have to know about future behavior of a program. AspectKE* also provides a novel variable binding mechanism for pointcuts, so that pointcuts can uniformly specify join points based on both static and dynamic information about the program. Our implementation strategy performs fundamental static analysis at load-time, so as to retain runtime overheads minimal. We implemented a compiler for AspectKE*, and demonstrate usefulness of AspectKE* through a security aspect for a distributed chat system.}, language = {en} } @misc{RepsilberKernTelaaretal.2010, author = {Repsilber, Dirk and Kern, Sabine and Telaar, Anna and Walzl, Gerhard and Black, Gillian F. and Selbig, Joachim and Parida, Shreemanta K. and Kaufmann, Stefan H. E. and Jacobsen, Marc}, title = {Biomarker discovery in heterogeneous tissue samples}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {854}, issn = {1866-8372}, doi = {10.25932/publishup-42934}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-429343}, pages = {17}, year = {2010}, abstract = {Background: For heterogeneous tissues, such as blood, measurements of gene expression are confounded by relative proportions of cell types involved. Conclusions have to rely on estimation of gene expression signals for homogeneous cell populations, e.g. by applying micro-dissection, fluorescence activated cell sorting, or in-silico deconfounding. We studied feasibility and validity of a non-negative matrix decomposition algorithm using experimental gene expression data for blood and sorted cells from the same donor samples. Our objective was to optimize the algorithm regarding detection of differentially expressed genes and to enable its use for classification in the difficult scenario of reversely regulated genes. This would be of importance for the identification of candidate biomarkers in heterogeneous tissues. Results: Experimental data and simulation studies involving noise parameters estimated from these data revealed that for valid detection of differential gene expression, quantile normalization and use of non-log data are optimal. We demonstrate the feasibility of predicting proportions of constituting cell types from gene expression data of single samples, as a prerequisite for a deconfounding-based classification approach. Classification cross-validation errors with and without using deconfounding results are reported as well as sample-size dependencies. Implementation of the algorithm, simulation and analysis scripts are available. Conclusions: The deconfounding algorithm without decorrelation using quantile normalization on non-log data is proposed for biomarkers that are difficult to detect, and for cases where confounding by varying proportions of cell types is the suspected reason. In this case, a deconfounding ranking approach can be used as a powerful alternative to, or complement of, other statistical learning approaches to define candidate biomarkers for molecular diagnosis and prediction in biomedicine, in realistically noisy conditions and with moderate sample sizes.}, language = {en} } @book{SmirnovReijersNugterenetal.2010, author = {Smirnov, Sergey and Reijers, Hajo A. and Nugteren, Thijs and Weske, Mathias}, title = {Business process model abstraction : theory and practice}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-054-0}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-41782}, publisher = {Universit{\"a}t Potsdam}, pages = {17}, year = {2010}, abstract = {Business process management aims at capturing, understanding, and improving work in organizations. The central artifacts are process models, which serve different purposes. Detailed process models are used to analyze concrete working procedures, while high-level models show, for instance, handovers between departments. To provide different views on process models, business process model abstraction has emerged. While several approaches have been proposed, a number of abstraction use case that are both relevant for industry and scientifically challenging are yet to be addressed. In this paper we systematically develop, classify, and consolidate different use cases for business process model abstraction. The reported work is based on a study with BPM users in the health insurance sector and validated with a BPM consultancy company and a large BPM vendor. The identified fifteen abstraction use cases reflect the industry demand. The related work on business process model abstraction is evaluated against the use cases, which leads to a research agenda.}, language = {en} } @inproceedings{BandaGallagher2010, author = {Banda, Gourinath and Gallagher, John P.}, title = {Constraint-based abstraction of a model checker for infinite state systems}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-41516}, year = {2010}, abstract = {Abstract interpretation-based model checking provides an approach to verifying properties of infinite-state systems. In practice, most previous work on abstract model checking is either restricted to verifying universal properties, or develops special techniques for temporal logics such as modal transition systems or other dual transition systems. By contrast we apply completely standard techniques for constructing abstract interpretations to the abstraction of a CTL semantic function, without restricting the kind of properties that can be verified. Furthermore we show that this leads directly to implementation of abstract model checking algorithms for abstract domains based on constraints, making use of an SMT solver.}, language = {en} } @book{OPUS4-4643, title = {Dritter Deutscher IPv6 Gipfel 2010}, editor = {Meinel, Christoph and Sack, Harald}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-092-2}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-46134}, publisher = {Universit{\"a}t Potsdam}, pages = {112}, year = {2010}, abstract = {Am 24. und 25. Juni 2010 fand am Hasso-Plattner-Institut f{\"u}r Softwaresystemtechnik GmbH in Potsdam der 3. Deutsche IPv6 Gipfel 2010 statt, dessen Dokumentation der vorliegende technische Report dient. Als nationaler Arm des weltweiten IPv6-Forums f{\"o}rdert der Deutsche IPv6-Rat den {\"U}bergangsprozess zur neuen Internetgeneration und brachte in diesem Rahmen nationale und internationale Experten aus Wirtschaft, Wissenschaft und {\"o}ffentlicher Verwaltung zusammen, um Awareness f{\"u}r das Zukunftsthema IPv6 zu schaffen und um ein Resum{\´e} {\"u}ber die bislang erzielten Fortschritte zu ziehen. Die Grenzen des alten Internetprotokolls IPv4 sind in den vergangenen zwei Jahren deutlicher denn je zutage getreten. Waren im vergangenen Jahr anl{\"a}sslich des 2. IPv6 Gipfels noch 11\% aller zu vergebenden IPv4 Adressen verf{\"u}gbar, ist diese Zahl mittlerweile auf nur noch 6\% geschrumpft. Ehrengast war in diesem Jahr der „europ{\"a}ische Vater" des Internets, Prof. Peter T. Kirstein vom University College London, dessen Hauptvortrag von weiteren Beitr{\"a}gen hochrangiger Vertretern aus Politik, Wissenschaft und Wirtschaft erg{\"a}nzt wurde.}, language = {mul} } @inproceedings{GeskeGoltz2010, author = {Geske, Ulrich and Goltz, Hans-Joachim}, title = {Efficiency of difference-list programming}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-41563}, year = {2010}, abstract = {The difference-list technique is described in literature as effective method for extending lists to the right without using calls of append/3. There exist some proposals for automatic transformation of list programs into differencelist programs. However, we are interested in construction of difference-list programs by the programmer, avoiding the need of a transformation step. In [GG09] it was demonstrated, how left-recursive procedures with a dangling call of append/3 can be transformed into right-recursion using the unfolding technique. For simplification of writing difference-list programs using a new cons/2 procedure was introduced. In the present paper, we investigate how efficieny is influenced using cons/2. We measure the efficiency of procedures using accumulator technique, cons/2, DCG's, and difference lists and compute the resulting speedup in respect to the simple procedure definition using append/3. Four Prolog systems were investigated and we found different behaviour concerning the speedup by difference lists. A result of our investigations is, that an often advice given in the literature for avoiding calls append/3 could not be confirmed in this strong formulation.}, language = {en} } @book{BauckmannLeserNaumann2010, author = {Bauckmann, Jana and Leser, Ulf and Naumann, Felix}, title = {Efficient and exact computation of inclusion dependencies for data integration}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-048-9}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-41396}, publisher = {Universit{\"a}t Potsdam}, pages = {36}, year = {2010}, abstract = {Data obtained from foreign data sources often come with only superficial structural information, such as relation names and attribute names. Other types of metadata that are important for effective integration and meaningful querying of such data sets are missing. In particular, relationships among attributes, such as foreign keys, are crucial metadata for understanding the structure of an unknown database. The discovery of such relationships is difficult, because in principle for each pair of attributes in the database each pair of data values must be compared. A precondition for a foreign key is an inclusion dependency (IND) between the key and the foreign key attributes. We present with Spider an algorithm that efficiently finds all INDs in a given relational database. It leverages the sorting facilities of DBMS but performs the actual comparisons outside of the database to save computation. Spider analyzes very large databases up to an order of magnitude faster than previous approaches. We also evaluate in detail the effectiveness of several heuristics to reduce the number of necessary comparisons. Furthermore, we generalize Spider to find composite INDs covering multiple attributes, and partial INDs, which are true INDs for all but a certain number of values. This last type is particularly relevant when integrating dirty data as is often the case in the life sciences domain - our driving motivation.}, language = {en} } @article{DuennebierDiethelm2010, author = {D{\"u}nnebier, Malte and Diethelm, Ira}, title = {Ein virtueller Lernraum f{\"u}r die Informatiklehrerweiterbildung}, series = {Commentarii informaticae didacticae : (CID)}, journal = {Commentarii informaticae didacticae : (CID)}, number = {4}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-64359}, pages = {65 -- 70}, year = {2010}, abstract = {Bei der Suche nach M{\"o}glichkeiten, die Weiterbildung f{\"u}r Informatiklehrkr{\"a}fte auszubauen, bietet sich der Einsatz virtueller Lernr{\"a}ume an. Dieses Papier berichtet {\"u}ber ein Projekt, in dem ein exemplarischer virtueller Lernraum f{\"u}r kollaboratives Lernen in der Lehrerweiterbildung in Informatik theoriegeleitet erstellt, erprobt und bewertet wurde. Die erzielten Ergebnisse {\"u}ber das Nutzungsverhalten k{\"o}nnen f{\"u}r weitere E-Learningprojekte in der Lehrerbildung hilfreich sein. Der Schwerpunkt dieses Papiers liegt auf der Gestaltung des Lernraums unter Beachtung der speziellen Situation der Informatiklehrkr{\"a}fte, nicht auf der didaktischen Aufbereitung der betreffenden Lerneinheit.}, language = {de} } @article{KehrerKelter2010, author = {Kehrer, Timo and Kelter, Udo}, title = {Eine aufwandsbeschr{\"a}nkte Einf{\"u}hrung in die modellbasierte Softwareentwicklung}, series = {Commentarii informaticae didacticae : (CID)}, journal = {Commentarii informaticae didacticae : (CID)}, number = {4}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, issn = {1868-0844}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-64304}, pages = {23 -- 33}, year = {2010}, abstract = {Zusammenfassung: Game-based Learning und Edutainment sind aktuelle Schlagworte im Bereich der Hochschulausbildung. Zun{\"a}chst verbindet man damit die Integration einer Spiel- und Spaßkultur in die herk{\"o}mmlichen Lehrveranstaltungen wie Vorlesungen, {\"U}bungen, Praktika und Seminare. Die nachfolgenden Ausf{\"u}hrungen gehen einer genaueren Begriffsanalyse nach und untersuchen, ob Game-based Learning und Edutainment tats{\"a}chlich neuartige Unterrichtsformen erfordern oder neue didaktische {\"U}berlegungen in bestehendes Unterrichtsgeschehen bringen - oder ob es nicht doch an einigen Stellen „alter Wein in neuen Schl{\"a}uchen" ist.}, language = {de} } @misc{MargariaSteffenKubczak2010, author = {Margaria, Tiziana and Steffen, Bernhard and Kubczak, Christian}, title = {Evolution support in heterogeneous service-oriented landscapes}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {918}, issn = {1866-8372}, doi = {10.25932/publishup-43240}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-432405}, pages = {15}, year = {2010}, abstract = {We present an approach that provides automatic or semi-automatic support for evolution and change management in heterogeneous legacy landscapes where (1) legacy heterogeneous, possibly distributed platforms are integrated in a service oriented fashion, (2) the coordination of functionality is provided at the service level, through orchestration, (3) compliance and correctness are provided through policies and business rules, (4) evolution and correctness-by-design are supported by the eXtreme Model Driven Development paradigm (XMDD) offered by the jABC (Margaria and Steffen in Annu. Rev. Commun. 57, 2004)—the model-driven service oriented development platform we use here for integration, design, evolution, and governance. The artifacts are here semantically enriched, so that automatic synthesis plugins can field the vision of Enterprise Physics: knowledge driven business process development for the end user. We demonstrate this vision along a concrete case study that became over the past three years a benchmark for Semantic Web Service discovery and mediation. We enhance the Mediation Scenario of the Semantic Web Service Challenge along the 2 central evolution paradigms that occur in practice: (a) Platform migration: platform substitution of a legacy system by an ERP system and (b) Backend extension: extension of the legacy Customer Relationship Management (CRM) and Order Management System (OMS) backends via an additional ERP layer.}, language = {en} } @inproceedings{Cabalar2010, author = {Cabalar, Pedro}, title = {Existential quantifiers in the rule body}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-41476}, year = {2010}, abstract = {In this paper we consider a simple syntactic extension of Answer Set Programming (ASP) for dealing with (nested) existential quantifiers and double negation in the rule bodies, in a close way to the recent proposal RASPL-1. The semantics for this extension just resorts to Equilibrium Logic (or, equivalently, to the General Theory of Stable Models), which provides a logic-programming interpretation for any arbitrary theory in the syntax of Predicate Calculus. We present a translation of this syntactic class into standard logic programs with variables (either disjunctive or normal, depending on the input rule heads), as those allowed by current ASP solvers. The translation relies on the introduction of auxiliary predicates and the main result shows that it preserves strong equivalence modulo the original signature.}, language = {en} } @book{LangeBoehmNaumann2010, author = {Lange, Dustin and B{\"o}hm, Christoph and Naumann, Felix}, title = {Extracting structured information from Wikipedia articles to populate infoboxes}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-081-6}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-45714}, publisher = {Universit{\"a}t Potsdam}, pages = {27}, year = {2010}, abstract = {Roughly every third Wikipedia article contains an infobox - a table that displays important facts about the subject in attribute-value form. The schema of an infobox, i.e., the attributes that can be expressed for a concept, is defined by an infobox template. Often, authors do not specify all template attributes, resulting in incomplete infoboxes. With iPopulator, we introduce a system that automatically populates infoboxes of Wikipedia articles by extracting attribute values from the article's text. In contrast to prior work, iPopulator detects and exploits the structure of attribute values for independently extracting value parts. We have tested iPopulator on the entire set of infobox templates and provide a detailed analysis of its effectiveness. For instance, we achieve an average extraction precision of 91\% for 1,727 distinct infobox template attributes.}, language = {en} } @phdthesis{Brauer2010, author = {Brauer, Falk}, title = {Extraktion und Identifikation von Entit{\"a}ten in Textdaten im Umfeld der Enterprise Search}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-51409}, school = {Universit{\"a}t Potsdam}, year = {2010}, abstract = {Die automatische Informationsextraktion (IE) aus unstrukturierten Texten erm{\"o}glicht v{\"o}llig neue Wege, auf relevante Informationen zuzugreifen und deren Inhalte zu analysieren, die weit {\"u}ber bisherige Verfahren zur Stichwort-basierten Dokumentsuche hinausgehen. Die Entwicklung von Programmen zur Extraktion von maschinenlesbaren Daten aus Texten erfordert jedoch nach wie vor die Entwicklung von dom{\"a}nenspezifischen Extraktionsprogrammen. Insbesondere im Bereich der Enterprise Search (der Informationssuche im Unternehmensumfeld), in dem eine große Menge von heterogenen Dokumenttypen existiert, ist es oft notwendig ad-hoc Programm-module zur Extraktion von gesch{\"a}ftsrelevanten Entit{\"a}ten zu entwickeln, die mit generischen Modulen in monolithischen IE-Systemen kombiniert werden. Dieser Umstand ist insbesondere kritisch, da potentiell f{\"u}r jeden einzelnen Anwendungsfall ein von Grund auf neues IE-System entwickelt werden muss. Die vorliegende Dissertation untersucht die effiziente Entwicklung und Ausf{\"u}hrung von IE-Systemen im Kontext der Enterprise Search und effektive Methoden zur Ausnutzung bekannter strukturierter Daten im Unternehmenskontext f{\"u}r die Extraktion und Identifikation von gesch{\"a}ftsrelevanten Entit{\"a}ten in Doku-menten. Grundlage der Arbeit ist eine neuartige Plattform zur Komposition von IE-Systemen auf Basis der Beschreibung des Datenflusses zwischen generischen und anwendungsspezifischen IE-Modulen. Die Plattform unterst{\"u}tzt insbesondere die Entwicklung und Wiederverwendung von generischen IE-Modulen und zeichnet sich durch eine h{\"o}here Flexibilit{\"a}t und Ausdrucksm{\"a}chtigkeit im Vergleich zu vorherigen Methoden aus. Ein in der Dissertation entwickeltes Verfahren zur Dokumentverarbeitung interpretiert den Daten-austausch zwischen IE-Modulen als Datenstr{\"o}me und erm{\"o}glicht damit eine weitgehende Parallelisierung von einzelnen Modulen. Die autonome Ausf{\"u}hrung der Module f{\"u}hrt zu einer wesentlichen Beschleu-nigung der Verarbeitung von Einzeldokumenten und verbesserten Antwortzeiten, z. B. f{\"u}r Extraktions-dienste. Bisherige Ans{\"a}tze untersuchen lediglich die Steigerung des durchschnittlichen Dokumenten-durchsatzes durch verteilte Ausf{\"u}hrung von Instanzen eines IE-Systems. Die Informationsextraktion im Kontext der Enterprise Search unterscheidet sich z. B. von der Extraktion aus dem World Wide Web dadurch, dass in der Regel strukturierte Referenzdaten z. B. in Form von Unternehmensdatenbanken oder Terminologien zur Verf{\"u}gung stehen, die oft auch die Beziehungen von Entit{\"a}ten beschreiben. Entit{\"a}ten im Unternehmensumfeld haben weiterhin bestimmte Charakteristiken: Eine Klasse von relevanten Entit{\"a}ten folgt bestimmten Bildungsvorschriften, die nicht immer bekannt sind, auf die aber mit Hilfe von bekannten Beispielentit{\"a}ten geschlossen werden kann, so dass unbekannte Entit{\"a}ten extrahiert werden k{\"o}nnen. Die Bezeichner der anderen Klasse von Entit{\"a}ten haben eher umschreibenden Charakter. Die korrespondierenden Umschreibungen in Texten k{\"o}nnen variieren, wodurch eine Identifikation derartiger Entit{\"a}ten oft erschwert wird. Zur effizienteren Entwicklung von IE-Systemen wird in der Dissertation ein Verfahren untersucht, das alleine anhand von Beispielentit{\"a}ten effektive Regul{\"a}re Ausdr{\"u}cke zur Extraktion von unbekannten Entit{\"a}ten erlernt und damit den manuellen Aufwand in derartigen Anwendungsf{\"a}llen minimiert. Verschiedene Generalisierungs- und Spezialisierungsheuristiken erkennen Muster auf verschiedenen Abstraktionsebenen und schaffen dadurch einen Ausgleich zwischen Genauigkeit und Vollst{\"a}ndigkeit bei der Extraktion. Bekannte Regellernverfahren im Bereich der Informationsextraktion unterst{\"u}tzen die beschriebenen Problemstellungen nicht, sondern ben{\"o}tigen einen (annotierten) Dokumentenkorpus. Eine Methode zur Identifikation von Entit{\"a}ten, die durch Graph-strukturierte Referenzdaten vordefiniert sind, wird als dritter Schwerpunkt untersucht. Es werden Verfahren konzipiert, welche {\"u}ber einen exakten Zeichenkettenvergleich zwischen Text und Referenzdatensatz hinausgehen und Teil{\"u}bereinstimmungen und Beziehungen zwischen Entit{\"a}ten zur Identifikation und Disambiguierung heranziehen. Das in der Arbeit vorgestellte Verfahren ist bisherigen Ans{\"a}tzen hinsichtlich der Genauigkeit und Vollst{\"a}ndigkeit bei der Identifikation {\"u}berlegen.}, language = {de} }