@phdthesis{Aktas2023,
  author    = {Aktas, Berfin},
  title     = {Variation in coreference patterns},
  doi       = {10.25932/publishup-59608},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-596086},
  school      = {Universit{\"a}t Potsdam},
  pages     = {xviii, 195},
  year      = {2023},
  abstract  = {This thesis explores the variation in coreference patterns across language modes (i.e., spoken and written) and text genres. The significance of research on variation in language use has been emphasized in a number of linguistic studies. For instance, Biber and Conrad [2009] state that "register/genre variation is a fundamental aspect of human language" and "Given the ubiquity of register/genre variation, an understanding of how linguistic features are used in patterned ways across text varieties is of central importance for both the description of particular languages and the development of cross-linguistic theories of language use."[p.23] We examine the variation across genres with the primary goal of contributing to the body of knowledge on the description of language use in English. On the computational side, we believe that incorporating linguistic knowledge into learning-based systems can boost the performance of automatic natural language processing systems, particularly for non-standard texts. Therefore, in addition to their descriptive value, the linguistic findings we provide in this study may prove to be helpful for improving the performance of automatic coreference resolution, which is essential for a good text understanding and beneficial for several downstream NLP applications, including machine translation and text summarization. In particular, we study a genre of texts that is formed of conversational interactions on the well-known social media platform Twitter. Two factors motivate us: First, Twitter conversations are realized in written form but resemble spoken communication [Scheffler, 2017], and therefore they form an atypical genre for the written mode. Second, while Twitter texts are a complicated genre for automatic coreference resolution, due to their widespread use in the digital sphere, at the same time they are highly relevant for applications that seek to extract information or sentiments from users' messages. Thus, we are interested in discovering more about the linguistic and computational aspects of coreference in Twitter conversations. We first created a corpus of such conversations for this purpose and annotated it for coreference. We are interested in not only the coreference patterns but the overall discourse behavior of Twitter conversations. To address this, in addition to the coreference relations, we also annotated the coherence relations on the corpus we compiled. The corpus is available online in a newly developed form that allows for separating the tweets from their annotations. This study consists of three empirical analyses where we independently apply corpus-based, psycholinguistic and computational approaches for the investigation of variation in coreference patterns in a complementary manner. (1) We first make a descriptive analysis of variation across genres through a corpus-based study. We investigate the linguistic aspects of nominal coreference in Twitter conversations and we determine how this genre relates to other text genres in spoken and written modes. In addition to the variation across genres, studying the differences in spoken-written modes is also in focus of linguistic research since from Woolbert [1922]. (2) In order to investigate whether the language mode alone has any effect on coreference patterns, we carry out a crowdsourced experiment and analyze the patterns in the same genre for both spoken and written modes. (3) Finally, we explore the potentials of domain adaptation of automatic coreference resolution (ACR) for the conversational Twitter data. In order to answer the question of how the genre of Twitter conversations relates to other genres in spoken and written modes with respect to coreference patterns, we employ a state-of-the-art neural ACR model [Lee et al., 2018] to examine whether ACR on Twitter conversations will benefit from mode-based separation in out-of-domain training data.},
  language  = {en}
}
@article{LagoSloggettSchlueteretal.2017,
  author    = {Lago, Sol and Sloggett, Shayne and Schl{\"u}ter, Zoe and Chow, Wing Yee and Williams, Alexander and Lau, Ellen and Phillips, Colin},
  title     = {Coreference and Antecedent Representation Across Languages},
  series = {Journal of experimental psychology : Learning, memory, and cognition},
  volume    = {43},
  journal   = {Journal of experimental psychology : Learning, memory, and cognition},
  publisher = {American Psychological Association},
  address   = {Washington},
  issn      = {0278-7393},
  doi       = {10.1037/xlm0000343},
  pages     = {795 -- 817},
  year      = {2017},
  language  = {en}
}
@misc{LagoNamystJaegeretal.2019,
  author    = {Lago, Sol and Namyst, Anna and J{\"a}ger, Lena Ann and Lau, Ellen},
  title     = {Antecedent access mechanisms in pronoun processing},
  series = {Postprints der Universit{\"a}t Potsdam Humanwissenschaftliche Reihe},
  journal   = {Postprints der Universit{\"a}t Potsdam Humanwissenschaftliche Reihe},
  number    = {568},
  issn      = {1866-8364},
  doi       = {10.25932/publishup-43323},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-433237},
  pages     = {641 -- 661},
  year      = {2019},
  abstract  = {Previous cross-modal priming studies showed that lexical decisions to words after a pronoun were facilitated when these words were semantically related to the pronoun's antecedent. These studies suggested that semantic priming effectively measured antecedent retrieval during coreference. We examined whether these effects extended to implicit reading comprehension using the N400 response. The results of three experiments did not yield strong evidence of semantic facilitation due to coreference. Further, the comparison with two additional experiments showed that N400 facilitation effects were reduced in sentences (vs. word pair paradigms) and were modulated by the case morphology of the prime word. We propose that priming effects in cross-modal experiments may have resulted from task-related strategies. More generally, the impact of sentence context and morphological information on priming effects suggests that they may depend on the extent to which the upcoming input is predicted, rather than automatic spreading activation between semantically related words.},
  language  = {en}
}
@phdthesis{Ritz2013,
  author    = {Ritz, Julia},
  title     = {Discourse-givenness of noun phrases : theoretical and computational models},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-70818},
  school      = {Universit{\"a}t Potsdam},
  year      = {2013},
  abstract  = {This thesis gives formal definitions of discourse-givenness, coreference and reference, and reports on experiments with computational models of discourse-givenness of noun phrases for English and German. Definitions are based on Bach's (1987) work on reference, Kibble and van Deemter's (2000) work on coreference, and Kamp and Reyle's Discourse Representation Theory (1993). For the experiments, the following corpora with coreference annotation were used: MUC-7, OntoNotes and ARRAU for Englisch, and TueBa-D/Z for German. As for classification algorithms, they cover J48 decision trees, the rule based learner Ripper, and linear support vector machines. New features are suggested, representing the noun phrase's specificity as well as its context, which lead to a significant improvement of classification quality.},
  language  = {en}
}