@book{StedeChiarcosGrabskietal.2005, author = {Stede, Manfred and Chiarcos, Christian and Grabski, Michael and Lagerwerf, Luuk}, title = {Salience in discurse : multidisciplinary approaches to discourse 2005}, series = {Uitgaven Stichting Neerlandistiek VU}, volume = {49}, journal = {Uitgaven Stichting Neerlandistiek VU}, publisher = {Nodus-Publ; Stichting Neerlandistiek VU}, address = {M{\"u}nster; Amsterdam}, isbn = {3-89323-749-6}, pages = {153 S.}, year = {2005}, language = {en} } @article{ChiarcosDipperGoetzeetal.2008, author = {Chiarcos, Christian and Dipper, Stefanie and G{\"o}tze, Michael and Leser, Ulf and L{\"u}deling, Anke and Ritz, Julia and Stede, Manfred}, title = {A flexible framework for integrating annotations from different tools and tag sets}, issn = {1248-9433}, year = {2008}, abstract = {We present a general framework for integrating annotations from different tools and tag sets. When annotating corpora at multiple linguistic levels, annotators may use different expert tools for different phenomena or types of annotation. These tools employ different data models and accompanying approaches to visualization, and they produce different output formats. For the purposes of uniformly processing these outputs, we developed a pivot format called PAULA, along with converters to and from tool formats. Different annotations are not only integrated at the level of data format, but are also joined on the level of conceptual representation. For this purpose, we introduce OLiA, an ontology of linguistic annotations that mediates between alternative tag sets that cover the same class of linguistic phenomena. All components are integrated in the linguistic information system ANNIS : Annotation tool output is converted to the pivot format PAULA and read into a database where the data can be visualized, queried, and evaluated across multiple layers. For cross-tag set querying and statistical evaluation, ANNIS uses the ontology of linguistic annotations. Finally, ANNIS is also tied to a machine learning component for semiautomatic annotation.}, language = {en} } @article{ChiarcosRitzStede2012, author = {Chiarcos, Christian and Ritz, Julia and Stede, Manfred}, title = {By all these lovely tokens... Merging conflicting tokenizations}, series = {Language resources and evaluation}, volume = {46}, journal = {Language resources and evaluation}, number = {1}, publisher = {Springer}, address = {Dordrecht}, issn = {1574-020X}, doi = {10.1007/s10579-011-9161-0}, pages = {53 -- 74}, year = {2012}, abstract = {Given the contemporary trend to modular NLP architectures and multiple annotation frameworks, the existence of concurrent tokenizations of the same text represents a pervasive problem in everyday's NLP practice and poses a non-trivial theoretical problem to the integration of linguistic annotations and their interpretability in general. This paper describes a solution for integrating different tokenizations using a standoff XML format, and discusses the consequences from a corpus-linguistic perspective.}, language = {en} } @book{StedeMamprinPeldszusetal.2015, author = {Stede, Manfred and Mamprin, Sara and Peldszus, Andreas and Herzog, Andr{\´e} and Kaupat, David and Chiarcos, Christian and Warzecha, Saskia}, title = {Handbuch Textannotation}, editor = {Stede, Manfred}, isbn = {978-3-86956-343-5}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-82761}, publisher = {Universit{\"a}t Potsdam}, pages = {xvii, 209}, year = {2015}, abstract = {Das Potsdamer Kommentarkorpus ist eine Sammlung von Zeitungstexten, die dem Genre 'Kommentar' zuzuordnen sind. Der {\"o}ffentlich verf{\"u}gbare Teil besteht aus 175 Texten aus der M{\"a}rkischen Allgemeinen Zeitung, die hinsichtlich Syntax, Koreferenz, Konnektoren und Rhetorische Struktur manuell annotiert wurden. Weitere Ebenen werden bei zuk{\"u}nftigen Korpusversionen hinzukommen. Dieses Buch enth{\"a}lt die Annotationsrichtlinien, die der Bearbeitung des {\"o}ffentlichen Teils des Korpus zugrunde lagen, sowie auch anderer Teile, bei denen mit weiteren Annotationsebenen experimentiert wurde. Die meisten der Richtlinien werden auch f{\"u}r {\"a}hnliche Text-Genres und f{\"u}r andere Sprachen verwendbar sein.}, language = {de} }