@article{GaroufiStaudteKolleretal.2016,
  author    = {Garoufi, Konstantina and Staudte, Maria and Koller, Alexander and Crocker, Matthew W.},
  title     = {Exploiting Listener Gaze to Improve Situated Communication in Dynamic Virtual Environments},
  series = {Cognitive science : a multidisciplinary journal of anthropology, artificial intelligence, education, linguistics, neuroscience, philosophy, psychology ; journal of the Cognitive Science Society},
  volume    = {40},
  journal   = {Cognitive science : a multidisciplinary journal of anthropology, artificial intelligence, education, linguistics, neuroscience, philosophy, psychology ; journal of the Cognitive Science Society},
  publisher = {Wiley-Blackwell},
  address   = {Hoboken},
  issn      = {0364-0213},
  doi       = {10.1111/cogs.12298},
  pages     = {1671 -- 1703},
  year      = {2016},
  abstract  = {Beyond the observation that both speakers and listeners rapidly inspect the visual targets of referring expressions, it has been argued that such gaze may constitute part of the communicative signal. In this study, we investigate whether a speaker may, in principle, exploit listener gaze to improve communicative success. In the context of a virtual environment where listeners follow computer-generated instructions, we provide two kinds of support for this claim. First, we show that listener gaze provides a reliable real-time index of understanding even in dynamic and complex environments, and on a per-utterance basis. Second, we show that a language generation system that uses listener gaze to provide rapid feedback improves overall task performance in comparison with two systems that do not use gaze. Aside from demonstrating the utility of listener gaze insituated communication, our findings open the door to new methods for developing and evaluating multi-modal models of situated interaction.},
  language  = {en}
}
@article{GaroufiKoller2014,
  author    = {Garoufi, Konstantina and Koller, Alexander},
  title     = {Generation of effective referring expressions in situated context},
  series = {Language, cognition and neuroscience},
  volume    = {29},
  journal   = {Language, cognition and neuroscience},
  number    = {8},
  publisher = {Routledge, Taylor \& Francis Group},
  address   = {Abingdon},
  issn      = {2327-3798},
  doi       = {10.1080/01690965.2013.847190},
  pages     = {986 -- 1001},
  year      = {2014},
  abstract  = {In task-oriented communication, references often need to be effective in their distinctive function, that is, help the hearer identify the referent correctly and as effortlessly as possible. However, it can be challenging for computational or empirical studies to capture referential effectiveness. Empirical findings indicate that human-produced references are not always optimally effective, and that their effectiveness may depend on different aspects of the situational context that can evolve dynamically over the course of an interaction. On this basis, we propose a computational model of effective reference generation which distinguishes speaker behaviour according to its helpfulness to the hearer in a certain situation, and explicitly aims at modelling highly helpful speaker behaviour rather than speaker behaviour invariably. Our model, which extends the planning-based paradigm of sentence generation with a statistical account of effectiveness, can adapt to the situational context by making this distinction newly for each new reference. We find that the generated references resemble those of effective human speakers more closely than references of baseline models, and that they are resolved correctly more often than those of other models participating in a shared-task evaluation with human hearers. Finally, we argue that the model could serve as a methodological framework for computational and empirical research on referential effectiveness.},
  language  = {en}
}
@phdthesis{Garoufi2013,
  author    = {Garoufi, Konstantina},
  title     = {Interactive generation of effective discourse in situated context : a planning-based approach},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-69108},
  school      = {Universit{\"a}t Potsdam},
  year      = {2013},
  abstract  = {As our modern-built structures are becoming increasingly complex, carrying out basic tasks such as identifying points or objects of interest in our surroundings can consume considerable time and cognitive resources. In this thesis, we present a computational approach to converting contextual information about a person's physical environment into natural language, with the aim of helping this person identify given task-related entities in their environment. Using efficient methods from automated planning - the field of artificial intelligence concerned with finding courses of action that can achieve a goal -, we generate discourse that interactively guides a hearer through completing their task. Our approach addresses the challenges of controlling, adapting to, and monitoring the situated context. To this end, we develop a natural language generation system that plans how to manipulate the non-linguistic context of a scene in order to make it more favorable for references to task-related objects. This strategy distributes a hearer's cognitive load of interpreting a reference over multiple utterances rather than one long referring expression. Further, to optimize the system's linguistic choices in a given context, we learn how to distinguish speaker behavior according to its helpfulness to hearers in a certain situation, and we model the behavior of human speakers that has been proven helpful. The resulting system combines symbolic with statistical reasoning, and tackles the problem of making non-trivial referential choices in rich context. Finally, we complement our approach with a mechanism for preventing potential misunderstandings after a reference has been generated. Employing remote eye-tracking technology, we monitor the hearer's gaze and find that it provides a reliable index of online referential understanding, even in dynamically changing scenes. We thus present a system that exploits hearer gaze to generate rapid feedback on a per-utterance basis, further enhancing its effectiveness. Though we evaluate our approach in virtual environments, the efficiency of our planning-based model suggests that this work could be a step towards effective conversational human-computer interaction situated in the real world.},
  language  = {en}
}