@article{ChiarcosRitzStede2012, author = {Chiarcos, Christian and Ritz, Julia and Stede, Manfred}, title = {By all these lovely tokens... Merging conflicting tokenizations}, series = {Language resources and evaluation}, volume = {46}, journal = {Language resources and evaluation}, number = {1}, publisher = {Springer}, address = {Dordrecht}, issn = {1574-020X}, doi = {10.1007/s10579-011-9161-0}, pages = {53 -- 74}, year = {2012}, abstract = {Given the contemporary trend to modular NLP architectures and multiple annotation frameworks, the existence of concurrent tokenizations of the same text represents a pervasive problem in everyday's NLP practice and poses a non-trivial theoretical problem to the integration of linguistic annotations and their interpretability in general. This paper describes a solution for integrating different tokenizations using a standoff XML format, and discusses the consequences from a corpus-linguistic perspective.}, language = {en} }