@masterthesis{Zolotarenko2020, type = {Bachelor Thesis}, author = {Zolotarenko, Olha}, title = {Visualization approaches for coherence relations}, doi = {10.25932/publishup-51699}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-516997}, school = {Universit{\"a}t Potsdam}, year = {2020}, abstract = {Die hier vorliegende Arbeit stellt einen Versuch dar, den Visualisierungsans{\"a}tzen in dem Feld der annotierten Diskursrelationen nahezukommen und durch Vergleich verschiedener Programmierwerkzeuge eine anforderungsnahe L{\"o}sung zu finden. Als Gegenstand der Forschung wurden Koh{\"a}renzrelationen ausgew{\"a}hlt, welche eine Reihe an Eigenschaften aufweisen, die f{\"u}r viele Visualisierungsmethoden herausfordernd sein k{\"o}nnen. Die Arbeit stellt f{\"u}nf verschiedene Visualisierungsm{\"o}glichkeiten sowohl von der Anwendungs- als auch von der Entwicklungsperspektive vor. Die zun{\"a}chst getesteten einfachen HTML-Ans{\"a}tze sowie das Softwarepaket displaCy zeigen das unzureichende Niveau f{\"u}r die Visualisierungszwecke dieser Arbeit. Die alternative Implementierung mit D3 w{\"u}rde die Voraussetzungen zwar optimal erf{\"u}llen, sprengt aber deutlich den Rahmen des Projektes. Die gew{\"a}hlte Hauptmethode wurde als Single-Web-Anwendung konzipiert und verwendet das Annotationstool brat, welches die meisten definierten Voraussetzungen f{\"u}r die Repr{\"a}sentation der Koh{\"a}renzrelationen erf{\"u}llt. Die Anwendung stellt die im Text annotierten Koh{\"a}renzrelationen graphisch dar und bietet eine Filterfunktion f{\"u}r verschiedene Relationstypen an.}, language = {en} } @phdthesis{Sidarenka2019, author = {Sidarenka, Uladzimir}, title = {Sentiment analysis of German Twitter}, doi = {10.25932/publishup-43742}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-437422}, school = {Universit{\"a}t Potsdam}, pages = {vii, 217}, year = {2019}, abstract = {The immense popularity of online communication services in the last decade has not only upended our lives (with news spreading like wildfire on the Web, presidents announcing their decisions on Twitter, and the outcome of political elections being determined on Facebook) but also dramatically increased the amount of data exchanged on these platforms. Therefore, if we wish to understand the needs of modern society better and want to protect it from new threats, we urgently need more robust, higher-quality natural language processing (NLP) applications that can recognize such necessities and menaces automatically, by analyzing uncensored texts. Unfortunately, most NLP programs today have been created for standard language, as we know it from newspapers, or, in the best case, adapted to the specifics of English social media. This thesis reduces the existing deficit by entering the new frontier of German online communication and addressing one of its most prolific forms—users' conversations on Twitter. In particular, it explores the ways and means by how people express their opinions on this service, examines current approaches to automatic mining of these feelings, and proposes novel methods, which outperform state-of-the-art techniques. For this purpose, I introduce a new corpus of German tweets that have been manually annotated with sentiments, their targets and holders, as well as lexical polarity items and their contextual modifiers. Using these data, I explore four major areas of sentiment research: (i) generation of sentiment lexicons, (ii) fine-grained opinion mining, (iii) message-level polarity classification, and (iv) discourse-aware sentiment analysis. In the first task, I compare three popular groups of lexicon generation methods: dictionary-, corpus-, and word-embedding-based ones, finding that dictionary-based systems generally yield better polarity lists than the last two groups. Apart from this, I propose a linear projection algorithm, whose results surpass many existing automatically-generated lexicons. Afterwords, in the second task, I examine two common approaches to automatic prediction of sentiment spans, their sources, and targets: conditional random fields (CRFs) and recurrent neural networks, obtaining higher scores with the former model and improving these results even further by redefining the structure of CRF graphs. When dealing with message-level polarity classification, I juxtapose three major sentiment paradigms: lexicon-, machine-learning-, and deep-learning-based systems, and try to unite the first and last of these method groups by introducing a bidirectional neural network with lexicon-based attention. Finally, in order to make the new classifier aware of microblogs' discourse structure, I let it separately analyze the elementary discourse units of each tweet and infer the overall polarity of a message from the scores of its EDUs with the help of two new approaches: latent-marginalized CRFs and Recursive Dirichlet Process.}, language = {en} } @phdthesis{Jurish2011, author = {Jurish, Bryan}, title = {Finite-state canonicalization techniques for historical German}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-55789}, school = {Universit{\"a}t Potsdam}, year = {2011}, abstract = {This work addresses issues in the automatic preprocessing of historical German input text for use by conventional natural language processing techniques. Conventional techniques cannot adequately account for historical input text due to conventional tools' reliance on a fixed application-specific lexicon keyed by contemporary orthographic surface form on the one hand, and the lack of consistent orthographic conventions in historical input text on the other. Historical spelling variation is treated here as an error-correction problem or "canonicalization" task: an attempt to automatically assign each (historical) input word a unique extant canonical cognate, thus allowing direct application-specific processing (tagging, parsing, etc.) of the returned canonical forms without need for any additional application-specific modifications. In the course of the work, various methods for automatic canonicalization are investigated and empirically evaluated, including conflation by phonetic identity, conflation by lemma instantiation heuristics, canonicalization by weighted finite-state rewrite cascade, and token-wise disambiguation by a dynamic Hidden Markov Model.}, language = {en} } @inproceedings{OPUS4-2516, title = {Finite-state methods and natural language processing : 6th International Workshop, FSMNLP 2007 Potsdam, Germany, september 14 - 16 ; revised papers}, editor = {Hanneforth, Thomas and W{\"u}rzner, Kay-Michael}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-940793-57-7}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-23812}, pages = {225}, year = {2008}, abstract = {Proceedings with the revised papers of the FSMNLP (Finite-state Methods and Natural Language Processing) 2007 Workshop in Potsdam}, language = {en} }