@phdthesis{Jurish2011, author = {Jurish, Bryan}, title = {Finite-state canonicalization techniques for historical German}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-55789}, school = {Universit{\"a}t Potsdam}, year = {2011}, abstract = {This work addresses issues in the automatic preprocessing of historical German input text for use by conventional natural language processing techniques. Conventional techniques cannot adequately account for historical input text due to conventional tools' reliance on a fixed application-specific lexicon keyed by contemporary orthographic surface form on the one hand, and the lack of consistent orthographic conventions in historical input text on the other. Historical spelling variation is treated here as an error-correction problem or "canonicalization" task: an attempt to automatically assign each (historical) input word a unique extant canonical cognate, thus allowing direct application-specific processing (tagging, parsing, etc.) of the returned canonical forms without need for any additional application-specific modifications. In the course of the work, various methods for automatic canonicalization are investigated and empirically evaluated, including conflation by phonetic identity, conflation by lemma instantiation heuristics, canonicalization by weighted finite-state rewrite cascade, and token-wise disambiguation by a dynamic Hidden Markov Model.}, language = {en} } @phdthesis{Nimz2015, author = {Nimz, Katharina}, title = {Sound perception and production in a foreign language}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-361-9}, issn = {2190-4545}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-88794}, school = {Universit{\"a}t Potsdam}, pages = {xix, 236}, year = {2015}, abstract = {The present study addresses the question of how German vowels are perceived and produced by Polish learners of German as a Foreign Language. It comprises three main experiments: a discrimination experiment, a production experiment, and an identification experiment. With the exception of the discrimination task, the experiments further investigated the influence of orthographic marking on the perception and production of German vowel length. It was assumed that explicit markings such as the Dehnungs-h ("lengthening h") could help Polish GFL learners in perceiving and producing German words more correctly. The discrimination experiment with manipulated nonce words showed that Polish GFL learners detect pure length differences in German vowels less accurately than German native speakers, while this was not the case for pure quality differences. The results of the identification experiment contrast with the results of the discrimination task in that Polish GFL learners were better at judging incorrect vowel length than incorrect vowel quality in manipulated real words. However, orthographic marking did not turn out to be the driving factor and it is suggested that metalinguistic awareness can explain the asymmetry between the two perception experiments. The production experiment supported the results of the identification task in that lengthening h did not help Polish learners in producing German vowel length more correctly. Yet, as far as vowel quality productions are concerned, it is argued that orthography does influence L2 sound productions because Polish learners seem to be negatively influenced by their native grapheme-to-phoneme correspondences. It is concluded that it is important to differentiate between the influence of the L1 and L2 orthographic system. On the one hand, the investigation of the influence of orthographic vowel length markers in German suggests that Polish GFL learners do not make use of length information provided by the L2 orthographic system. On the other hand, the vowel quality data suggest that the L1 orthographic system plays a crucial role in the acquisition of a foreign language. It is therefore proposed that orthography influences the acquisition of foreign sounds, but not in the way it was originally assumed.}, language = {en} } @article{ViebahnMcQueenErnestusetal.2018, author = {Viebahn, Malte Clemens and McQueen, James Milroy and Ernestus, Mirjam Theresia Constantia and Frauenfelder, Ulrich Hans and B{\"u}rki-Foschini, Audrey Damaris}, title = {How much does orthography influence the processing of reduced word forms?}, series = {The quarterly journal of experimental psychology}, volume = {71}, journal = {The quarterly journal of experimental psychology}, number = {11}, publisher = {Routledge, Taylor \& Francis Group}, address = {Abingdon}, issn = {1747-0218}, doi = {10.1177/1747021817741859}, pages = {2378 -- 2394}, year = {2018}, abstract = {This study examines the influence of orthography on the processing of reduced word forms. For this purpose, we compared the impact of phonological variation with the impact of spelling-sound consistency on the processing of words that may be produced with or without the vowel schwa. Participants learnt novel French words in which the vowel schwa was present or absent in the first syllable. In Experiment 1, the words were consistently produced without schwa or produced in a variable manner (i.e., sometimes produced with and sometimes produced without schwa). In Experiment 2, words were always produced in a consistent manner, but an orthographic exposure phase was included in which words that were produced without schwa were either spelled with or without the letter < e >. Results from naming and eye-tracking tasks suggest that both phonological variation and spelling-sound consistency influence the processing of spoken novel words. However, the influence of phonological variation outweighs the effect of spelling-sound consistency. Our findings therefore suggest that the influence of orthography on the processing of reduced word forms is relatively small.}, language = {en} } @article{Schroeder2020, author = {Schroeder, Christoph}, title = {The advanced acquisition of orthography in heritage Turkish in Germany}, series = {Written language \& literacy}, volume = {23}, journal = {Written language \& literacy}, number = {2}, publisher = {John Benjamins Publishing Co.}, address = {Amsterdam}, issn = {1387-6732}, doi = {10.1075/wll.00043.sch}, pages = {251 -- 271}, year = {2020}, abstract = {The paper investigates Turkish texts from heritage speakers of Turkish in Germany in a pseudo-longitudinal setting, looking at pupils' texts from the 5th, 7th, 10th and 12th grades. Two types of dynamics are identified in the advanced acquisition(1) of Turkish orthography in the heritage context. One is the dynamic of language contact, where in certain areas of the orthography, we find a re-interpretation of Turkish principles according to the German model. However, this changes as the pupils grow up. The second dynamic is the heritage situation. The heritage situation on one side leads to the establishment of new practices, and it also leads to a higher degree of variability of spelling solutions in those areas, where the orthographic system of Turkish poses challenges to every writer, whether monolingual and growing up in Turkey or heritage speaker.}, language = {en} } @misc{PaulyNottbusch2020, author = {Pauly, Dennis Nikolas and Nottbusch, Guido}, title = {The Influence of the German Capitalization Rules on Reading}, series = {Postprints der Universit{\"a}t Potsdam : Humanwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Humanwissenschaftliche Reihe}, number = {622}, issn = {1866-8364}, doi = {10.25932/publishup-46085}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-460857}, pages = {17}, year = {2020}, abstract = {German orthography systematically marks all nouns (even other nominalized word classes) by capitalizing their first letter. It is often claimed that readers benefit from the uppercase-letter syntactic and semantic information, which makes the processing of sentences easier (e.g., Bock et al., 1985, 1989). In order to test this hypothesis, we asked 54 German readers to read single sentences systematically manipulated by a target word (N). In the experimental condition (EXP), we used semantic priming (in the following example: sick → cold) in order to build up a strong expectation of a noun, which was actually an attribute for the following noun (N+1) (translated to English e.g., "The sick writer had a cold (N) nose (N+1) …"). The sentences in the control condition were built analogously, but word N was purposefully altered (keeping word length and frequency constant) to make its interpretation as a noun extremely unlikely (e.g., "The sick writer had a blue (N) nose (N+1) …"). In both conditions, the sentences were presented either following German standard orthography (Cap) or in lowercase spelling (NoCap). The capitalized nouns in the EXP/Cap condition should then prevent garden-path parsing, as capital letters can be recognized parafoveally. However, in the EXP/NoCap condition, we expected a garden-path effect on word N+1 affecting first-pass fixations and the number of regressions, as the reader realizes that word N is instead an adjective. As the control condition does not include a garden-path, we expected to find (small) effects of the violation of the orthographic rule in the CON/NoCap condition, but no garden-path effect. As a global result, it can be stated that reading sentences in which nouns are not marked by a majuscule slows a native German reader down significantly, but from an absolute point of view, the effect is small. Compared with other manipulations (e.g., transpositions or substitutions), a lowercase letter still represents the correct allograph in the correct position without affecting phonology. Furthermore, most German readers do have experience with other alphabetic writing systems that lack consistent noun capitalization, and in (private) digital communication lowercase nouns are quite common. Although our garden-path sentences did not show the desired effect, we found an indication of grammatical pre-processing enabled by the majuscule in the regularly spelled sentences: In the case of high noun frequency, we post hoc located parafovea-on-fovea effects, i.e., longer fixation durations, on the attributive adjective (word N). These benefits of capitalization could only be detected under specific circumstances. In other cases, we conclude that longer reading durations are mainly the result of disturbance in readers' habituation when the expected capitalization is missing.}, language = {en} } @article{PaulyNottbusch2020, author = {Pauly, Dennis Nikolas and Nottbusch, Guido}, title = {The Influence of the German Capitalization Rules on Reading}, series = {Frontiers in Communication}, volume = {5}, journal = {Frontiers in Communication}, publisher = {Frontiers Media}, address = {Lausanne}, issn = {2297-900X}, doi = {10.3389/fcomm.2020.00015}, pages = {15}, year = {2020}, abstract = {German orthography systematically marks all nouns (even other nominalized word classes) by capitalizing their first letter. It is often claimed that readers benefit from the uppercase-letter syntactic and semantic information, which makes the processing of sentences easier (e.g., Bock et al., 1985, 1989). In order to test this hypothesis, we asked 54 German readers to read single sentences systematically manipulated by a target word (N). In the experimental condition (EXP), we used semantic priming (in the following example: sick -> cold) in order to build up a strong expectation of a noun, which was actually an attribute for the following noun (N+1) (translated to English e.g., "The sick writer had a cold (N) nose (N+1) ..."). The sentences in the control condition were built analogously, but word N was purposefully altered (keeping word length and frequency constant) to make its interpretation as a noun extremely unlikely (e.g., "The sick writer had a blue (N) nose (N+1) ..."). In both conditions, the sentences were presented either following German standard orthography (Cap) or in lowercase spelling (NoCap). The capitalized nouns in the EXP/Cap condition should then prevent garden-path parsing, as capital letters can be recognized parafoveally. However, in the EXP/NoCap condition, we expected a garden-path effect on word N+1 affecting first-pass fixations and the number of regressions, as the reader realizes that word N is instead an adjective. As the control condition does not include a garden-path, we expected to find (small) effects of the violation of the orthographic rule in the CON/NoCap condition, but no garden-path effect. As a global result, it can be stated that reading sentences in which nouns are not marked by a majuscule slows a native German reader down significantly, but from an absolute point of view, the effect is small. Compared with other manipulations (e.g., transpositions or substitutions), a lowercase letter still represents the correct allograph in the correct position without affecting phonology. Furthermore, most German readers do have experience with other alphabetic writing systems that lack consistent noun capitalization, and in (private) digital communication lowercase nouns are quite common. Although our garden-path sentences did not show the desired effect, we found an indication of grammatical pre-processing enabled by the majuscule in the regularly spelled sentences: In the case of high noun frequency, we post hoc located parafovea-on-fovea effects, i.e., longer fixation durations, on the attributive adjective (word N). These benefits of capitalization could only be detected under specific circumstances. In other cases, we conclude that longer reading durations are mainly the result of disturbance in readers' habituation when the expected capitalization is missing.}, language = {en} } @article{PaulyNottbusch2020, author = {Pauly, Dennis Nikolas and Nottbusch, Guido}, title = {The Influence of the German Capitalization Rules on Reading}, series = {Frontiers in Communication}, volume = {5}, journal = {Frontiers in Communication}, publisher = {Frontiers Media}, address = {Lausanne}, issn = {2297-900X}, doi = {10.3389/fcomm.2020.00015}, pages = {15}, year = {2020}, abstract = {German orthography systematically marks all nouns (even other nominalized word classes) by capitalizing their first letter. It is often claimed that readers benefit from the uppercase-letter syntactic and semantic information, which makes the processing of sentences easier (e.g., Bock et al., 1985, 1989). In order to test this hypothesis, we asked 54 German readers to read single sentences systematically manipulated by a target word (N). In the experimental condition (EXP), we used semantic priming (in the following example: sick → cold) in order to build up a strong expectation of a noun, which was actually an attribute for the following noun (N+1) (translated to English e.g., "The sick writer had a cold (N) nose (N+1) …"). The sentences in the control condition were built analogously, but word N was purposefully altered (keeping word length and frequency constant) to make its interpretation as a noun extremely unlikely (e.g., "The sick writer had a blue (N) nose (N+1) …"). In both conditions, the sentences were presented either following German standard orthography (Cap) or in lowercase spelling (NoCap). The capitalized nouns in the EXP/Cap condition should then prevent garden-path parsing, as capital letters can be recognized parafoveally. However, in the EXP/NoCap condition, we expected a garden-path effect on word N+1 affecting first-pass fixations and the number of regressions, as the reader realizes that word N is instead an adjective. As the control condition does not include a garden-path, we expected to find (small) effects of the violation of the orthographic rule in the CON/NoCap condition, but no garden-path effect. As a global result, it can be stated that reading sentences in which nouns are not marked by a majuscule slows a native German reader down significantly, but from an absolute point of view, the effect is small. Compared with other manipulations (e.g., transpositions or substitutions), a lowercase letter still represents the correct allograph in the correct position without affecting phonology. Furthermore, most German readers do have experience with other alphabetic writing systems that lack consistent noun capitalization, and in (private) digital communication lowercase nouns are quite common. Although our garden-path sentences did not show the desired effect, we found an indication of grammatical pre-processing enabled by the majuscule in the regularly spelled sentences: In the case of high noun frequency, we post hoc located parafovea-on-fovea effects, i.e., longer fixation durations, on the attributive adjective (word N). These benefits of capitalization could only be detected under specific circumstances. In other cases, we conclude that longer reading durations are mainly the result of disturbance in readers' habituation when the expected capitalization is missing.}, language = {en} }