@article{StedePeldszus2012, author = {Stede, Manfred and Peldszus, Andreas}, title = {The role of illocutionary status in the usage conditions of causal connectives and in coherence relations}, series = {Journal of pragmatics : an interdisciplinary journal of language studies}, volume = {44}, journal = {Journal of pragmatics : an interdisciplinary journal of language studies}, number = {2}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0378-2166}, doi = {10.1016/j.pragma.2012.01.004}, pages = {214 -- 229}, year = {2012}, abstract = {The meaning of linguistic connectives has often been characterized in terms of their position in a bipartite (semantic, pragmatic) or a tripartite (content, epistemic, speech act) structure of domains, depending on what kinds of entities are being connected (largely: propositions or speech acts). This paper argues that a more fine-grained analysis can be achieved by directing some more attention to the characterization of the entities being related. We propose an inventory of categories of illocutionary status for labelling the spans that are being connected. On this basis, the distinction between the content and the epistemic domain, in particular, can be made more explicit. Focusing on the group of causal connectives in German, we conducted a corpus annotation study from which we derived distinct pragmatic 'usage profiles' of the most frequent causal connectives. Finally, we offer some suggestions on the role of illocutions in relation-based accounts of discourse structure.}, language = {en} } @article{StedeHuang2012, author = {Stede, Manfred and Huang, Chu-Ren}, title = {Inter-operability and reusability the science of annotation}, series = {Language resources and evaluation}, volume = {46}, journal = {Language resources and evaluation}, number = {1}, publisher = {Springer}, address = {Dordrecht}, issn = {1574-020X}, doi = {10.1007/s10579-011-9164-x}, pages = {91 -- 94}, year = {2012}, abstract = {Annotating linguistic data has become a major field of interest, both for supplying the necessary data for machine learning approaches to NLP applications, and as a research issue in its own right. This comprises issues of technical formats, tools, and methodologies of annotation. We provide a brief overview of these notions and then introduce the papers assembled in this special issue.}, language = {en} } @article{ChiarcosRitzStede2012, author = {Chiarcos, Christian and Ritz, Julia and Stede, Manfred}, title = {By all these lovely tokens... Merging conflicting tokenizations}, series = {Language resources and evaluation}, volume = {46}, journal = {Language resources and evaluation}, number = {1}, publisher = {Springer}, address = {Dordrecht}, issn = {1574-020X}, doi = {10.1007/s10579-011-9161-0}, pages = {53 -- 74}, year = {2012}, abstract = {Given the contemporary trend to modular NLP architectures and multiple annotation frameworks, the existence of concurrent tokenizations of the same text represents a pervasive problem in everyday's NLP practice and poses a non-trivial theoretical problem to the integration of linguistic annotations and their interpretability in general. This paper describes a solution for integrating different tokenizations using a standoff XML format, and discusses the consequences from a corpus-linguistic perspective.}, language = {en} }