@article{WulffBuschhueterWestphaletal.2022,
  author    = {Wulff, Peter and Buschh{\"u}ter, David and Westphal, Andrea and Mientus, Lukas and Nowak, Anna and Borowski, Andreas},
  title     = {Bridging the gap between qualitative and quantitative assessment in science education research with machine learning},
  series = {Journal of science education and technology},
  volume    = {31},
  journal   = {Journal of science education and technology},
  number    = {4},
  publisher = {Springer},
  address   = {Dordrecht},
  issn      = {1059-0145},
  doi       = {10.1007/s10956-022-09969-w},
  pages     = {490 -- 513},
  year      = {2022},
  abstract  = {Science education researchers typically face a trade-off between more quantitatively oriented confirmatory testing of hypotheses, or more qualitatively oriented exploration of novel hypotheses. More recently, open-ended, constructed response items were used to combine both approaches and advance assessment of complex science-related skills and competencies. For example, research in assessing science teachers' noticing and attention to classroom events benefitted from more open-ended response formats because teachers can present their own accounts. Then, open-ended responses are typically analyzed with some form of content analysis. However, language is noisy, ambiguous, and unsegmented and thus open-ended, constructed responses are complex to analyze. Uncovering patterns in these responses would benefit from more principled and systematic analysis tools. Consequently, computer-based methods with the help of machine learning and natural language processing were argued to be promising means to enhance assessment of noticing skills with constructed response formats. In particular, pretrained language models recently advanced the study of linguistic phenomena and thus could well advance assessment of complex constructs through constructed response items. This study examines potentials and challenges of a pretrained language model-based clustering approach to assess preservice physics teachers' attention to classroom events as elicited through open-ended written descriptions. It was examined to what extent the clustering approach could identify meaningful patterns in the constructed responses, and in what ways textual organization of the responses could be analyzed with the clusters. Preservice physics teachers (N = 75) were instructed to describe a standardized, video-recorded teaching situation in physics. The clustering approach was used to group related sentences. Results indicate that the pretrained language model-based clustering approach yields well-interpretable, specific, and robust clusters, which could be mapped to physics-specific and more general contents. Furthermore, the clusters facilitate advanced analysis of the textual organization of the constructed responses. Hence, we argue that machine learning and natural language processing provide science education researchers means to combine exploratory capabilities of qualitative research methods with the systematicity of quantitative methods.},
  language  = {en}
}
@article{WulffMientusNowaketal.2022,
  author    = {Wulff, Peter and Mientus, Lukas and Nowak, Anna and Borowski, Andreas},
  title     = {Utilizing a pretrained language model (BERT) to classify preservice physics teachers' written reflections},
  series = {International journal of artificial intelligence in education},
  journal   = {International journal of artificial intelligence in education},
  number    = {33},
  publisher = {Springer},
  address   = {New York},
  issn      = {1560-4292},
  doi       = {10.1007/s40593-022-00290-6},
  pages     = {439 -- 466},
  year      = {2022},
  abstract  = {Computer-based analysis of preservice teachers' written reflections could enable educational scholars to design personalized and scalable intervention measures to support reflective writing. Algorithms and technologies in the domain of research related to artificial intelligence have been found to be useful in many tasks related to reflective writing analytics such as classification of text segments. However, mostly shallow learning algorithms have been employed so far. This study explores to what extent deep learning approaches can improve classification performance for segments of written reflections. To do so, a pretrained language model (BERT) was utilized to classify segments of preservice physics teachers' written reflections according to elements in a reflection-supporting model. Since BERT has been found to advance performance in many tasks, it was hypothesized to enhance classification performance for written reflections as well. We also compared the performance of BERT with other deep learning architectures and examined conditions for best performance. We found that BERT outperformed the other deep learning architectures and previously reported performances with shallow learning algorithms for classification of segments of reflective writing. BERT starts to outperform the other models when trained on about 20 to 30\% of the training data. Furthermore, attribution analyses for inputs yielded insights into important features for BERT's classification decisions. Our study indicates that pretrained language models such as BERT can boost performance for language-related tasks in educational contexts such as classification.},
  language  = {en}
}
@article{MientusHumeWulffetal.2022,
  author    = {Mientus, Lukas and Hume, Anne and Wulff, Peter and Meiners, Antoinette and Borowski, Andreas},
  title     = {Modelling STEM teachers' pedagogical content knowledge in the framework of the refined consensus model},
  series = {Education Sciences : open access journal},
  volume    = {12},
  journal   = {Education Sciences : open access journal},
  edition   = {6},
  publisher = {MDPI},
  address   = {Basel, Schweiz},
  issn      = {2227-7102},
  doi       = {10.3390/educsci12060385},
  pages     = {1 -- 25},
  year      = {2022},
  abstract  = {Science education researchers have developed a refined understanding of the structure of science teachers' pedagogical content knowledge (PCK), but how to develop applicable and situation-adequate PCK remains largely unclear. A potential problem lies in the diverse conceptualisations of the PCK used in PCK research. This study sought to systematize existing science education research on PCK through the lens of the recently proposed refined consensus model (RCM) of PCK. In this review, the studies' approaches to investigating PCK and selected findings were characterised and synthesised as an overview comparing research before and after the publication of the RCM. We found that the studies largely employed a qualitative case-study methodology that included specific PCK models and tools. However, in recent years, the studies focused increasingly on quantitative aspects. Furthermore, results of the reviewed studies can mostly be integrated into the RCM. We argue that the RCM can function as a meaningful theoretical lens for conceptualizing links between teaching practice and PCK development by proposing pedagogical reasoning as a mechanism and/or explanation for PCK development in the context of teaching practice.},
  language  = {en}
}
@article{WulffBuschhueterWestphaletal.2020,
  author    = {Wulff, Peter and Buschh{\"u}ter, David and Westphal, Andrea and Nowak, Anna and Becker, Lisa and Robalino, Hugo and Stede, Manfred and Borowski, Andreas},
  title     = {Computer-based classification of preservice physics teachers' written reflections},
  series = {Journal of science education and technology},
  volume    = {30},
  journal   = {Journal of science education and technology},
  number    = {1},
  publisher = {Springer},
  address   = {Dordrecht},
  issn      = {1059-0145},
  doi       = {10.1007/s10956-020-09865-1},
  pages     = {1 -- 15},
  year      = {2020},
  abstract  = {Reflecting in written form on one's teaching enactments has been considered a facilitator for teachers' professional growth in university-based preservice teacher education. Writing a structured reflection can be facilitated through external feedback. However, researchers noted that feedback in preservice teacher education often relies on holistic, rather than more content-based, analytic feedback because educators oftentimes lack resources (e.g., time) to provide more analytic feedback. To overcome this impediment to feedback for written reflection, advances in computer technology can be of use. Hence, this study sought to utilize techniques of natural language processing and machine learning to train a computer-based classifier that classifies preservice physics teachers' written reflections on their teaching enactments in a German university teacher education program. To do so, a reflection model was adapted to physics education. It was then tested to what extent the computer-based classifier could accurately classify the elements of the reflection model in segments of preservice physics teachers' written reflections. Multinomial logistic regression using word count as a predictor was found to yield acceptable average human-computer agreement (F1-score on held-out test dataset of 0.56) so that it might fuel further development towards an automated feedback tool that supplements existing holistic feedback for written reflections with data-based, analytic feedback.},
  language  = {en}
}