@article{WulffBuschhueterWestphaletal.2020,
  author    = {Wulff, Peter and Buschh{\"u}ter, David and Westphal, Andrea and Nowak, Anna and Becker, Lisa and Robalino, Hugo and Stede, Manfred and Borowski, Andreas},
  title     = {Computer-based classification of preservice physics teachers' written reflections},
  series = {Journal of science education and technology},
  volume    = {30},
  journal   = {Journal of science education and technology},
  number    = {1},
  publisher = {Springer},
  address   = {Dordrecht},
  issn      = {1059-0145},
  doi       = {10.1007/s10956-020-09865-1},
  pages     = {1 -- 15},
  year      = {2020},
  abstract  = {Reflecting in written form on one's teaching enactments has been considered a facilitator for teachers' professional growth in university-based preservice teacher education. Writing a structured reflection can be facilitated through external feedback. However, researchers noted that feedback in preservice teacher education often relies on holistic, rather than more content-based, analytic feedback because educators oftentimes lack resources (e.g., time) to provide more analytic feedback. To overcome this impediment to feedback for written reflection, advances in computer technology can be of use. Hence, this study sought to utilize techniques of natural language processing and machine learning to train a computer-based classifier that classifies preservice physics teachers' written reflections on their teaching enactments in a German university teacher education program. To do so, a reflection model was adapted to physics education. It was then tested to what extent the computer-based classifier could accurately classify the elements of the reflection model in segments of preservice physics teachers' written reflections. Multinomial logistic regression using word count as a predictor was found to yield acceptable average human-computer agreement (F1-score on held-out test dataset of 0.56) so that it might fuel further development towards an automated feedback tool that supplements existing holistic feedback for written reflections with data-based, analytic feedback.},
  language  = {en}
}
@article{LevyMussackBrunneretal.2020,
  author    = {Levy, Jessica and Mussack, Dominic and Brunner, Martin and Keller, Ulrich and Cardoso-Leite, Pedro and Fischbach, Antoine},
  title     = {Contrasting classical and machine learning approaches in the estimation of value-added scores in large-scale educational data},
  series = {Frontiers in psychology},
  volume    = {11},
  journal   = {Frontiers in psychology},
  publisher = {Frontiers Research Foundation},
  address   = {Lausanne},
  issn      = {1664-1078},
  doi       = {10.3389/fpsyg.2020.02190},
  pages     = {18},
  year      = {2020},
  abstract  = {There is no consensus on which statistical model estimates school value-added (VA) most accurately. To date, the two most common statistical models used for the calculation of VA scores are two classical methods: linear regression and multilevel models. These models have the advantage of being relatively transparent and thus understandable for most researchers and practitioners. However, these statistical models are bound to certain assumptions (e.g., linearity) that might limit their prediction accuracy. Machine learning methods, which have yielded spectacular results in numerous fields, may be a valuable alternative to these classical models. Although big data is not new in general, it is relatively new in the realm of social sciences and education. New types of data require new data analytical approaches. Such techniques have already evolved in fields with a long tradition in crunching big data (e.g., gene technology). The objective of the present paper is to competently apply these "imported" techniques to education data, more precisely VA scores, and assess when and how they can extend or replace the classical psychometrics toolbox. The different models include linear and non-linear methods and extend classical models with the most commonly used machine learning methods (i.e., random forest, neural networks, support vector machines, and boosting). We used representative data of 3,026 students in 153 schools who took part in the standardized achievement tests of the Luxembourg School Monitoring Program in grades 1 and 3. Multilevel models outperformed classical linear and polynomial regressions, as well as different machine learning models. However, it could be observed that across all schools, school VA scores from different model types correlated highly. Yet, the percentage of disagreements as compared to multilevel models was not trivial and real-life implications for individual schools may still be dramatic depending on the model type used. Implications of these results and possible ethical concerns regarding the use of machine learning methods for decision-making in education are discussed.},
  language  = {en}
}