@article{WardVegaBaumann2012, author = {Ward, Nigel G. and Vega, Alejandro and Baumann, Timo}, title = {Prosodic and temporal features for language modeling for dialog}, series = {Speech communication}, volume = {54}, journal = {Speech communication}, number = {2}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0167-6393}, doi = {10.1016/j.specom.2011.07.009}, pages = {161 -- 174}, year = {2012}, abstract = {If we can model the cognitive and communicative processes underlying speech, we should be able to better predict what a speaker will do. With this idea as inspiration, we examine a number of prosodic and timing features as potential sources of information on what words the speaker is likely to say next. In spontaneous dialog we find that word probabilities do vary with such features. Using perplexity as the metric, the most informative of these included recent speaking rate, volume, and pitch, and time until end of utterance. Using simple combinations of such features to augment trigram language models gave up to a 8.4\% perplexity benefit on the Switchboard corpus, and up to a 1.0\% relative reduction in word error rate (0.3\% absolute) on the Verbmobil II corpus.}, language = {en} }