@phdthesis{Jurish2011, author = {Jurish, Bryan}, title = {Finite-state canonicalization techniques for historical German}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-55789}, school = {Universit{\"a}t Potsdam}, year = {2011}, abstract = {This work addresses issues in the automatic preprocessing of historical German input text for use by conventional natural language processing techniques. Conventional techniques cannot adequately account for historical input text due to conventional tools' reliance on a fixed application-specific lexicon keyed by contemporary orthographic surface form on the one hand, and the lack of consistent orthographic conventions in historical input text on the other. Historical spelling variation is treated here as an error-correction problem or "canonicalization" task: an attempt to automatically assign each (historical) input word a unique extant canonical cognate, thus allowing direct application-specific processing (tagging, parsing, etc.) of the returned canonical forms without need for any additional application-specific modifications. In the course of the work, various methods for automatic canonicalization are investigated and empirically evaluated, including conflation by phonetic identity, conflation by lemma instantiation heuristics, canonicalization by weighted finite-state rewrite cascade, and token-wise disambiguation by a dynamic Hidden Markov Model.}, language = {en} } @phdthesis{Nimz2015, author = {Nimz, Katharina}, title = {Sound perception and production in a foreign language}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-361-9}, issn = {2190-4545}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-88794}, school = {Universit{\"a}t Potsdam}, pages = {xix, 236}, year = {2015}, abstract = {The present study addresses the question of how German vowels are perceived and produced by Polish learners of German as a Foreign Language. It comprises three main experiments: a discrimination experiment, a production experiment, and an identification experiment. With the exception of the discrimination task, the experiments further investigated the influence of orthographic marking on the perception and production of German vowel length. It was assumed that explicit markings such as the Dehnungs-h ("lengthening h") could help Polish GFL learners in perceiving and producing German words more correctly. The discrimination experiment with manipulated nonce words showed that Polish GFL learners detect pure length differences in German vowels less accurately than German native speakers, while this was not the case for pure quality differences. The results of the identification experiment contrast with the results of the discrimination task in that Polish GFL learners were better at judging incorrect vowel length than incorrect vowel quality in manipulated real words. However, orthographic marking did not turn out to be the driving factor and it is suggested that metalinguistic awareness can explain the asymmetry between the two perception experiments. The production experiment supported the results of the identification task in that lengthening h did not help Polish learners in producing German vowel length more correctly. Yet, as far as vowel quality productions are concerned, it is argued that orthography does influence L2 sound productions because Polish learners seem to be negatively influenced by their native grapheme-to-phoneme correspondences. It is concluded that it is important to differentiate between the influence of the L1 and L2 orthographic system. On the one hand, the investigation of the influence of orthographic vowel length markers in German suggests that Polish GFL learners do not make use of length information provided by the L2 orthographic system. On the other hand, the vowel quality data suggest that the L1 orthographic system plays a crucial role in the acquisition of a foreign language. It is therefore proposed that orthography influences the acquisition of foreign sounds, but not in the way it was originally assumed.}, language = {en} }