@article{SchuettRothkegelTrukenbrodetal.2019,
  author    = {Sch{\"u}tt, Heiko Herbert and Rothkegel, Lars Oliver Martin and Trukenbrod, Hans Arne and Engbert, Ralf and Wichmann, Felix A.},
  title     = {Disentangling bottom-up versus top-down and low-level versus high-level influences on eye movements over time},
  series = {Journal of vision},
  volume    = {19},
  journal   = {Journal of vision},
  number    = {3},
  publisher = {Association for Research in Vision and Opthalmology},
  address   = {Rockville},
  issn      = {1534-7362},
  doi       = {10.1167/19.3.1},
  pages     = {23},
  year      = {2019},
  abstract  = {Bottom-up and top-down as well as low-level and high-level factors influence where we fixate when viewing natural scenes. However, the importance of each of these factors and how they interact remains a matter of debate. Here, we disentangle these factors by analyzing their influence over time. For this purpose, we develop a saliency model that is based on the internal representation of a recent early spatial vision model to measure the low-level, bottom-up factor. To measure the influence of high-level, bottom-up features, we use a recent deep neural network-based saliency model. To account for top-down influences, we evaluate the models on two large data sets with different tasks: first, a memorization task and, second, a search task. Our results lend support to a separation of visual scene exploration into three phases: the first saccade, an initial guided exploration characterized by a gradual broadening of the fixation density, and a steady state that is reached after roughly 10 fixations. Saccade-target selection during the initial exploration and in the steady state is related to similar areas of interest, which are better predicted when including high-level features. In the search data set, fixation locations are determined predominantly by top-down processes. In contrast, the first fixation follows a different fixation density and contains a strong central fixation bias. Nonetheless, first fixations are guided strongly by image properties, and as early as 200 ms after image onset, fixations are better predicted by high-level information. We conclude that any low-level, bottom-up factors are mainly limited to the generation of the first saccade. All saccades are better explained when high-level features are considered, and later, this high-level, bottom-up control can be overruled by top-down influences.},
  language  = {en}
}
@article{SchottervonderMalsburgLeinenger2019,
  author    = {Schotter, Elizabeth Roye and von der Malsburg, Titus Raban and Leinenger, Mallorie},
  title     = {Forced Fixations, Trans-Saccadic Integration, and Word Recognition},
  series = {Journal of experimental psychology : Learning, memory, and cognition},
  volume    = {45},
  journal   = {Journal of experimental psychology : Learning, memory, and cognition},
  number    = {4},
  publisher = {American Psychological Association},
  address   = {Washington},
  issn      = {0278-7393},
  doi       = {10.1037/xlm0000617},
  pages     = {677 -- 688},
  year      = {2019},
  abstract  = {Recent studies using the gaze-contingent boundary paradigm reported a reversed preview benefit- shorter fixations on a target word when an unrelated preview was easier to process than the fixated target (Schotter \& Leinenger, 2016). This is explained viaforeedfixatiotzs-short fixations on words that would ideally be skipped (because lexical processing has progressed enough) but could not be because saccade planning reached a point of no return. This contrasts with accounts of preview effects via trans-saccadic integration-shorter fixations on a target word when the preview is more similar to it (see Cutter. Drieghe, \& Liversedge, 2015). In addition, if the previewed word-not the fixated target-determines subsequent eye movements, is it also this word that enters the linguistic processing stream? We tested these accounts by having 24 subjects read 150 sentences in the boundary paradigm in which both the preview and target were initially plausible but later one, both, or neither became implausible, providing an opportunity to probe which one was linguistically encoded. In an intervening buffer region, both words were plausible, providing an opportunity to investigate trans-saccadic integration. The frequency of the previewed word affected progressive saccades (i.e.. forced fixations) as well as when transsaccadic integration failure increased regressions, but, only the implausibility of the target word affected semantic encoding. These data support a hybrid account of saccadic control (Reingold, Reichle. Glaholt, \& Sheridan, 2012) driven by incomplete (often parafoveal) word recognition, which occurs prior to complete (often foveal) word recognition.},
  language  = {en}
}
@phdthesis{AbdelwahabHusseinAbdelwahabElsayed2019,
  author    = {Abdelwahab Hussein Abdelwahab Elsayed, Ahmed},
  title     = {Probabilistic, deep, and metric learning for biometric identiﬁcation from eye movements},
  doi       = {10.25932/publishup-46798},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-467980},
  school      = {Universit{\"a}t Potsdam},
  pages     = {vi, 65},
  year      = {2019},
  abstract  = {A central insight from psychological studies on human eye movements is that eye movement patterns are highly individually characteristic. They can, therefore, be used as a biometric feature, that is, subjects can be identiﬁed based on their eye movements. This thesis introduces new machine learning methods to identify subjects based on their eye movements while viewing arbitrary content. The thesis focuses on probabilistic modeling of the problem, which has yielded the best results in the most recent literature. The thesis studies the problem in three phases by proposing a purely probabilistic, probabilistic deep learning, and probabilistic deep metric learning approach. In the ﬁrst phase, the thesis studies models that rely on psychological concepts about eye movements. Recent literature illustrates that individual-speciﬁc distributions of gaze patterns can be used to accurately identify individuals. In these studies, models were based on a simple parametric family of distributions. Such simple parametric models can be robustly estimated from sparse data, but have limited ﬂexibility to capture the differences between individuals. Therefore, this thesis proposes a semiparametric model of gaze patterns that is ﬂexible yet robust for individual identiﬁcation. These patterns can be understood as domain knowledge derived from psychological literature. Fixations and saccades are examples of simple gaze patterns. The proposed semiparametric densities are drawn under a Gaussian process prior centered at a simple parametric distribution. Thus, the model will stay close to the parametric class of densities if little data is available, but it can also deviate from this class if enough data is available, increasing the ﬂexibility of the model. The proposed method is evaluated on a large-scale dataset, showing signiﬁcant improvements over the state-of-the-art. Later, the thesis replaces the model based on gaze patterns derived from psychological concepts with a deep neural network that can learn more informative and complex patterns from raw eye movement data. As previous work has shown that the distribution of these patterns across a sequence is informative, a novel statistical aggregation layer called the quantile layer is introduced. It explicitly ﬁts the distribution of deep patterns learned directly from the raw eye movement data. The proposed deep learning approach is end-to-end learnable, such that the deep model learns to extract informative, short local patterns while the quantile layer learns to approximate the distributions of these patterns. Quantile layers are a generic approach that can converge to standard pooling layers or have a more detailed description of the features being pooled, depending on the problem. The proposed model is evaluated in a large-scale study using the eye movements of subjects viewing arbitrary visual input. The model improves upon the standard pooling layers and other statistical aggregation layers proposed in the literature. It also improves upon the state-of-the-art eye movement biometrics by a wide margin. Finally, for the model to identify any subject — not just the set of subjects it is trained on — a metric learning approach is developed. Metric learning learns a distance function over instances. The metric learning model maps the instances into a metric space, where sequences of the same individual are close, and sequences of diﬀerent individuals are further apart. This thesis introduces a deep metric learning approach with distributional embeddings. The approach represents sequences as a set of continuous distributions in a metric space; to achieve this, a new loss function based on Wasserstein distances is introduced. The proposed method is evaluated on multiple domains besides eye movement biometrics. This approach outperforms the state of the art in deep metric learning in several domains while also outperforming the state of the art in eye movement biometrics.},
  language  = {en}
}