@article{EngelmannVasishthEngbertetal.2013,
  author    = {Engelmann, Felix and Vasishth, Shravan and Engbert, Ralf and Kliegl, Reinhold},
  title     = {A framework for modeling the interaction of syntactic processing and eye movement control},
  series = {Topics in cognitive science},
  volume    = {5},
  journal   = {Topics in cognitive science},
  number    = {3},
  publisher = {Wiley-Blackwell},
  address   = {Hoboken},
  issn      = {1756-8757},
  doi       = {10.1111/tops.12026},
  pages     = {452 -- 474},
  year      = {2013},
  abstract  = {We explore the interaction between oculomotor control and language comprehension on the sentence level using two well-tested computational accounts of parsing difficulty. Previous work (Boston, Hale, Vasishth, \& Kliegl, 2011) has shown that surprisal (Hale, 2001; Levy, 2008) and cue-based memory retrieval (Lewis \& Vasishth, 2005) are significant and complementary predictors of reading time in an eyetracking corpus. It remains an open question how the sentence processor interacts with oculomotor control. Using a simple linking hypothesis proposed in Reichle, Warren, and McConnell (2009), we integrated both measures with the eye movement model EMMA (Salvucci, 2001) inside the cognitive architecture ACT-R (Anderson et al., 2004). We built a reading model that could initiate short Time Out regressions (Mitchell, Shen, Green, \& Hodgson, 2008) that compensate for slow postlexical processing. This simple interaction enabled the model to predict the re-reading of words based on parsing difficulty. The model was evaluated in different configurations on the prediction of frequency effects on the Potsdam Sentence Corpus. The extension of EMMA with postlexical processing improved its predictions and reproduced re-reading rates and durations with a reasonable fit to the data. This demonstration, based on simple and independently motivated assumptions, serves as a foundational step toward a precise investigation of the interaction between high-level language processing and eye movement control.},
  language  = {en}
}
@article{VerissimoClahsen2014,
  author    = {Verissimo, Joao Marques and Clahsen, Harald},
  title     = {Variables and similarity in linguistic generalization: Evidence from inflectional classes in Portuguese},
  series = {Journal of memory and language},
  volume    = {76},
  journal   = {Journal of memory and language},
  publisher = {Elsevier},
  address   = {San Diego},
  issn      = {0749-596X},
  doi       = {10.1016/j.jml.2014.06.001},
  pages     = {61 -- 79},
  year      = {2014},
  abstract  = {Two opposing viewpoints have been advanced to account for morphological productivity, one according to which some knowledge is couched in the form of operations over variables, and another in which morphological generalization is primarily determined by similarity. We investigated this controversy by examining the generalization of Portuguese verb stems, which fall into one of three conjugation classes. In Study 1, an elicited production task revealed that the generalization of 2nd and 3rd conjugation stems is influenced by the degree of phonological similarity between novel roots and existing verbs, whereas the 1st conjugation generalizes beyond similarity. In Study 2, we directly contrasted two distinct computational implementations of conjugation class assignment in how well they matched the human data: a similarity-driven model that captures phonological similarities, and a dual-mechanism model that implements an explicit distinction between context-free and similarity-based generalizations. The similarity-driven model consistently underestimated 1st conjugation responses and overestimated proportions of 2nd and 3rd conjugation responses, especially for novel verbs that are highly similar to existing verbs of those classes. In contrast, the expected proportions produced by the dual-mechanism model were statistically indistinguishable from human responses. We conclude that both context-free and context-sensitive processes determine the generalization of conjugations in Portuguese, and that similarity-based algorithms of morphological acquisition are insufficient to exhibit default-like generalization. (C) 2014 Elsevier Inc. All rights reserved.},
  language  = {en}
}
@misc{TrukenbrodEngbert2014,
  author    = {Trukenbrod, Hans Arne and Engbert, Ralf},
  title     = {ICAT: a computational model for the adaptive control of fixation durations},
  series = {Psychonomic bulletin \& review : a journal of the Psychonomic Society},
  volume    = {21},
  journal   = {Psychonomic bulletin \& review : a journal of the Psychonomic Society},
  number    = {4},
  publisher = {Springer},
  address   = {New York},
  issn      = {1069-9384},
  doi       = {10.3758/s13423-013-0575-0},
  pages     = {907 -- 934},
  year      = {2014},
  abstract  = {Eye movements depend on cognitive processes related to visual information processing. Much has been learned about the spatial selection of fixation locations, while the principles governing the temporal control (fixation durations) are less clear. Here, we review current theories for the control of fixation durations in tasks like visual search, scanning, scene perception, and reading and propose a new model for the control of fixation durations. We distinguish two local principles from one global principle of control. First, an autonomous saccade timer initiates saccades after random time intervals (local-I). Second, foveal inhibition permits immediate prolongation of fixation durations by ongoing processing (local-II). Third, saccade timing is adaptive, so that the mean timer value depends on task requirements and fixation history (Global). We demonstrate by numerical simulations that our model qualitatively reproduces patterns of mean fixation durations and fixation duration distributions observed in typical experiments. When combined with assumptions of saccade target selection and oculomotor control, the model accounts for both temporal and spatial aspects of eye movement control in two versions of a visual search task. We conclude that the model provides a promising framework for the control of fixation durations in saccadic tasks.},
  language  = {en}
}
@article{PatilHanneBurchertetal.2016,
  author    = {Patil, Umesh and Hanne, Sandra and Burchert, Frank and De Bleser, Ria and Vasishth, Shravan},
  title     = {A Computational Evaluation of Sentence Processing Deficits in Aphasia},
  series = {Cognitive science : a multidisciplinary journal of anthropology, artificial intelligence, education, linguistics, neuroscience, philosophy, psychology ; journal of the Cognitive Science Society},
  volume    = {40},
  journal   = {Cognitive science : a multidisciplinary journal of anthropology, artificial intelligence, education, linguistics, neuroscience, philosophy, psychology ; journal of the Cognitive Science Society},
  publisher = {Wiley-Blackwell},
  address   = {Hoboken},
  issn      = {0364-0213},
  doi       = {10.1111/cogs.12250},
  pages     = {5 -- 50},
  year      = {2016},
  abstract  = {Individuals with agrammatic Broca's aphasia experience difficulty when processing reversible non-canonical sentences. Different accounts have been proposed to explain this phenomenon. The Trace Deletion account (Grodzinsky, 1995, 2000, 2006) attributes this deficit to an impairment in syntactic representations, whereas others (e.g., Caplan, Waters, Dede, Michaud, \& Reddy, 2007; Haarmann, Just, \& Carpenter, 1997) propose that the underlying structural representations are unimpaired, but sentence comprehension is affected by processing deficits, such as slow lexical activation, reduction in memory resources, slowed processing and/or intermittent deficiency, among others. We test the claims of two processing accounts, slowed processing and intermittent deficiency, and two versions of the Trace Deletion Hypothesis (TDH), in a computational framework for sentence processing (Lewis \& Vasishth, 2005) implemented in ACT-R (Anderson, Byrne, Douglass, Lebiere, \& Qin, 2004). The assumption of slowed processing is operationalized as slow procedural memory, so that each processing action is performed slower than normal, and intermittent deficiency as extra noise in the procedural memory, so that the parsing steps are more noisy than normal. We operationalize the TDH as an absence of trace information in the parse tree. To test the predictions of the models implementing these theories, we use the data from a German sentence—picture matching study reported in Hanne, Sekerina, Vasishth, Burchert, and De Bleser (2011). The data consist of offline (sentence-picture matching accuracies and response times) and online (eye fixation proportions) measures. From among the models considered, the model assuming that both slowed processing and intermittent deficiency are present emerges as the best model of sentence processing difficulty in aphasia. The modeling of individual differences suggests that, if we assume that patients have both slowed processing and intermittent deficiency, they have them in differing degrees.},
  language  = {en}
}
@article{RoonGafos2016,
  author    = {Roon, Kevin D. and Gafos, Adamantios I.},
  title     = {Perceiving while producing: Modeling the dynamics of phonological planning},
  series = {Journal of memory and language},
  volume    = {89},
  journal   = {Journal of memory and language},
  publisher = {Elsevier},
  address   = {San Diego},
  issn      = {0749-596X},
  doi       = {10.1016/j.jml.2016.01.005},
  pages     = {222 -- 243},
  year      = {2016},
  abstract  = {We offer a dynamical model of phonological planning that provides a formal instantiation of how the speech production and perception systems interact during online processing. The model is developed on the basis of evidence from an experimental task that requires concurrent use of both systems, the so-called response-distractor task in which speakers hear distractor syllables while they are preparing to produce required responses. The model formalizes how ongoing response planning is affected by perception and accounts for a range of results reported across previous studies. It does so by explicitly addressing the setting of parameter values in representations. The key unit of the model is that of the dynamic field, a distribution of activation over the range of values associated with each representational parameter. The setting of parameter values takes place by the attainment of a stable distribution of activation over the entire field, stable in the sense that it persists even after the response cue in the above experiments has been removed. This and other properties of representations that have been taken as axiomatic in previous work are derived by the dynamics of the proposed model. (C) 2016 Elsevier Inc. All rights reserved.},
  language  = {en}
}
@article{MaetzigVasishthEngelmannetal.2018,
  author    = {M{\"a}tzig, Paul and Vasishth, Shravan and Engelmann, Felix and Caplan, David and Burchert, Frank},
  title     = {A computational investigation of sources of variability in sentence comprehension difficulty in aphasia},
  series = {Topics in cognitive science},
  volume    = {10},
  journal   = {Topics in cognitive science},
  number    = {1},
  publisher = {Wiley},
  address   = {Hoboken},
  issn      = {1756-8757},
  doi       = {10.1111/tops.12323},
  pages     = {161 -- 174},
  year      = {2018},
  abstract  = {We present a computational evaluation of three hypotheses about sources of deficit in sentence comprehension in aphasia: slowed processing, intermittent deficiency, and resource reduction. The ACT-R based Lewis and Vasishth (2005) model is used to implement these three proposals. Slowed processing is implemented as slowed execution time of parse steps; intermittent deficiency as increased random noise in activation of elements in memory; and resource reduction as reduced spreading activation. As data, we considered subject vs. object relative sentences, presented in a self-paced listening modality to 56 individuals with aphasia (IWA) and 46 matched controls. The participants heard the sentences and carried out a picture verification task to decide on an interpretation of the sentence. These response accuracies are used to identify the best parameters (for each participant) that correspond to the three hypotheses mentioned above. We show that controls have more tightly clustered (less variable) parameter values than IWA; specifically, compared to controls, among IWA there are more individuals with slow parsing times, high noise, and low spreading activation. We find that (a) individual IWA show differential amounts of deficit along the three dimensions of slowed processing, intermittent deficiency, and resource reduction, (b) overall, there is evidence for all three sources of deficit playing a role, and (c) IWA have a more variable range of parameter values than controls. An important implication is that it may be meaningless to talk about sources of deficit with respect to an abstract verage IWA; the focus should be on the individual's differential degrees of deficit along different dimensions, and on understanding the causes of variability in deficit between participants.},
  language  = {en}
}
@article{LongdeMeloHeetal.2020,
  author    = {Long, Xiang and de Melo, Gerard and He, Dongliang and Li, Fu and Chi, Zhizhen and Wen, Shilei and Gan, Chuang},
  title     = {Purely attention based local feature integration for video classification},
  series = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  volume    = {44},
  journal   = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  number    = {4},
  publisher = {Inst. of Electr. and Electronics Engineers},
  address   = {Los Alamitos},
  issn      = {0162-8828},
  doi       = {10.1109/TPAMI.2020.3029554},
  pages     = {2140 -- 2154},
  year      = {2020},
  abstract  = {Recently, substantial research effort has focused on how to apply CNNs or RNNs to better capture temporal patterns in videos, so as to improve the accuracy of video classification. In this paper, we investigate the potential of a purely attention based local feature integration. Accounting for the characteristics of such features in video classification, we first propose Basic Attention Clusters (BAC), which concatenates the output of multiple attention units applied in parallel, and introduce a shifting operation to capture more diverse signals. Experiments show that BAC can achieve excellent results on multiple datasets. However, BAC treats all feature channels as an indivisible whole, which is suboptimal for achieving a finer-grained local feature integration over the channel dimension. Additionally, it treats the entire local feature sequence as an unordered set, thus ignoring the sequential relationships. To improve over BAC, we further propose the channel pyramid attention schema by splitting features into sub-features at multiple scales for coarse-to-fine sub-feature interaction modeling, and propose the temporal pyramid attention schema by dividing the feature sequences into ordered sub-sequences of multiple lengths to account for the sequential order. Our final model pyramidxpyramid attention clusters (PPAC) combines both channel pyramid attention and temporal pyramid attention to focus on the most important sub-features, while also preserving the temporal information of the video. We demonstrate the effectiveness of PPAC on seven real-world video classification datasets. Our model achieves competitive results across all of these, showing that our proposed framework can consistently outperform the existing local feature integration methods across a range of different scenarios.},
  language  = {en}
}
@article{PaapeAvetisyanLagoetal.2021,
  author    = {Paape, Dario and Avetisyan, Serine and Lago, Sol and Vasishth, Shravan},
  title     = {Modeling misretrieval and feature substitution in agreement attraction},
  series = {Cognitive science},
  volume    = {45},
  journal   = {Cognitive science},
  number    = {8},
  publisher = {Wiley-Blackwell},
  address   = {Malden, Mass.},
  issn      = {0364-0213},
  doi       = {10.1111/cogs.13019},
  pages     = {30},
  year      = {2021},
  abstract  = {We present computational modeling results based on a self-paced reading study investigating number attraction effects in Eastern Armenian. We implement three novel computational models of agreement attraction in a Bayesian framework and compare their predictive fit to the data using k-fold cross-validation. We find that our data are better accounted for by an encoding-based model of agreement attraction, compared to a retrieval-based model. A novel methodological contribution of our study is the use of comprehension questions with open-ended responses, so that both misinterpretation of the number feature of the subject phrase and misassignment of the thematic subject role of the verb can be investigated at the same time. We find evidence for both types of misinterpretation in our study, sometimes in the same trial. However, the specific error patterns in our data are not fully consistent with any previously proposed model.},
  language  = {en}
}