@masterthesis{Zolotarenko2020, type = {Bachelor Thesis}, author = {Zolotarenko, Olha}, title = {Visualization approaches for coherence relations}, doi = {10.25932/publishup-51699}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-516997}, school = {Universit{\"a}t Potsdam}, year = {2020}, abstract = {Die hier vorliegende Arbeit stellt einen Versuch dar, den Visualisierungsans{\"a}tzen in dem Feld der annotierten Diskursrelationen nahezukommen und durch Vergleich verschiedener Programmierwerkzeuge eine anforderungsnahe L{\"o}sung zu finden. Als Gegenstand der Forschung wurden Koh{\"a}renzrelationen ausgew{\"a}hlt, welche eine Reihe an Eigenschaften aufweisen, die f{\"u}r viele Visualisierungsmethoden herausfordernd sein k{\"o}nnen. Die Arbeit stellt f{\"u}nf verschiedene Visualisierungsm{\"o}glichkeiten sowohl von der Anwendungs- als auch von der Entwicklungsperspektive vor. Die zun{\"a}chst getesteten einfachen HTML-Ans{\"a}tze sowie das Softwarepaket displaCy zeigen das unzureichende Niveau f{\"u}r die Visualisierungszwecke dieser Arbeit. Die alternative Implementierung mit D3 w{\"u}rde die Voraussetzungen zwar optimal erf{\"u}llen, sprengt aber deutlich den Rahmen des Projektes. Die gew{\"a}hlte Hauptmethode wurde als Single-Web-Anwendung konzipiert und verwendet das Annotationstool brat, welches die meisten definierten Voraussetzungen f{\"u}r die Repr{\"a}sentation der Koh{\"a}renzrelationen erf{\"u}llt. Die Anwendung stellt die im Text annotierten Koh{\"a}renzrelationen graphisch dar und bietet eine Filterfunktion f{\"u}r verschiedene Relationstypen an.}, language = {en} } @phdthesis{Peldszus2017, author = {Peldszus, Andreas}, title = {Automatic recognition of argumentation structure in short monological texts}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-421441}, school = {Universit{\"a}t Potsdam}, pages = {xiv, 252}, year = {2017}, abstract = {The aim of this thesis is to develop approaches to automatically recognise the structure of argumentation in short monological texts. This amounts to identifying the central claim of the text, supporting premises, possible objections, and counter-objections to these objections, and connecting them correspondingly to a structure that adequately describes the argumentation presented in the text. The first step towards such an automatic analysis of the structure of argumentation is to know how to represent it. We systematically review the literature on theories of discourse, as well as on theories of the structure of argumentation against a set of requirements and desiderata, and identify the theory of J. B. Freeman (1991, 2011) as a suitable candidate to represent argumentation structure. Based on this, a scheme is derived that is able to represent complex argumentative structures and can cope with various segmentation issues typically occurring in authentic text. In order to empirically test our scheme for reliability of annotation, we conduct several annotation experiments, the most important of which assesses the agreement in reconstructing argumentation structure. The results show that expert annotators produce very reliable annotations, while the results of non-expert annotators highly depend on their training in and commitment to the task. We then introduce the 'microtext' corpus, a collection of short argumentative texts. We report on the creation, translation, and annotation of it and provide a variety of statistics. It is the first parallel corpus (with a German and English version) annotated with argumentation structure, and -- thanks to the work of our colleagues -- also the first annotated according to multiple theories of (global) discourse structure. The corpus is then used to develop and evaluate approaches to automatically predict argumentation structures in a series of six studies: The first two of them focus on learning local models for different aspects of argumentation structure. In the third study, we develop the main approach proposed in this thesis for predicting globally optimal argumentation structures: the 'evidence graph' model. This model is then systematically compared to other approaches in the fourth study, and achieves state-of-the-art results on the microtext corpus. The remaining two studies aim to demonstrate the versatility and elegance of the proposed approach by predicting argumentation structures of different granularity from text, and finally by using it to translate rhetorical structure representations into argumentation structures.}, language = {en} } @phdthesis{Bourgonje2021, author = {Bourgonje, Peter}, title = {Shallow discourse parsing for German}, doi = {10.25932/publishup-50663}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-506632}, school = {Universit{\"a}t Potsdam}, pages = {vii, 140}, year = {2021}, abstract = {While the last few decades have seen impressive improvements in several areas in Natural Language Processing, asking a computer to make sense of the discourse of utterances in a text remains challenging. There are several different theories that aim to describe and analyse the coherent structure that a well-written text inhibits. These theories have varying degrees of applicability and feasibility for practical use. Presumably the most data-driven of these theories is the paradigm that comes with the Penn Discourse TreeBank, a corpus annotated for discourse relations containing over 1 million words. Any language other than English however, can be considered a low-resource language when it comes to discourse processing. This dissertation is about shallow discourse parsing (discourse parsing following the paradigm of the Penn Discourse TreeBank) for German. The limited availability of annotated data for German means the potential of modern, deep-learning based methods relying on such data is also limited. This dissertation explores to what extent machine-learning and more recent deep-learning based methods can be combined with traditional, linguistic feature engineering to improve performance for the discourse parsing task. A pivotal role is played by connective lexicons that exhaustively list the discourse connectives of a particular language along with some of their core properties. To facilitate training and evaluation of the methods proposed in this dissertation, an existing corpus (the Potsdam Commentary Corpus) has been extended and additional data has been annotated from scratch. The approach to end-to-end shallow discourse parsing for German adopts a pipeline architecture and either presents the first results or improves over state-of-the-art for German for the individual sub-tasks of the discourse parsing task, which are, in processing order, connective identification, argument extraction and sense classification. The end-to-end shallow discourse parser for German that has been developed for the purpose of this dissertation is open-source and available online. In the course of writing this dissertation, work has been carried out on several connective lexicons in different languages. Due to their central role and demonstrated usefulness for the methods proposed in this dissertation, strategies are discussed for creating or further developing such lexicons for a particular language, as well as suggestions on how to further increase their usefulness for shallow discourse parsing.}, language = {en} }