@phdthesis{Stallasch2024, author = {Stallasch, Sophie E.}, title = {Optimizing power analysis for randomized experiments: Design parameters for student achievement}, doi = {10.25932/publishup-62939}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-629396}, school = {Universit{\"a}t Potsdam}, pages = {ix, 224}, year = {2024}, abstract = {Randomized trials (RTs) are promising methodological tools to inform evidence-based reform to enhance schooling. Establishing a robust knowledge base on how to promote student achievement requires sensitive RT designs demonstrating sufficient statistical power and precision to draw conclusive and correct inferences on the effectiveness of educational programs and innovations. Proper power analysis is therefore an integral component of any informative RT on student achievement. This venture critically hinges on the availability of reasonable input variance design parameters (and their inherent uncertainties) that optimally reflect the realities around the prospective RT—precisely, its target population and outcome, possibly applied covariates, the concrete design as well as the planned analysis. However, existing compilations in this vein show far-reaching shortcomings. The overarching endeavor of the present doctoral thesis was to substantively expand available resources devoted to tweak the planning of RTs evaluating educational interventions. At the core of this thesis is a systematic analysis of design parameters for student achievement, generating reliable and versatile compendia and developing thorough guidance to support apt power analysis to design strong RTs. To this end, the thesis at hand bundles two complementary studies which capitalize on rich data of several national probability samples from major German longitudinal large-scale assessments. Study I applied two- and three-level latent (covariate) modeling to analyze design parameters for a wide spectrum of mathematical-scientific, verbal, and domain-general achievement outcomes. Three vital covariate sets were covered comprising (a) pretests, (b) sociodemographic characteristics, and (c) their combination. The accumulated estimates were additionally summarized in terms of normative distributions. Study II specified (manifest) single-, two-, and three-level models and referred to influential psychometric heuristics to analyze design parameters and develop concise selection guidelines for covariate (a) types of varying bandwidth-fidelity (domain-identical, cross-domain, fluid intelligence pretests; sociodemographic characteristics), (b) combinations quantifying incremental validities, and (c) time lags of 1- to 7-year-lagged pretests scrutinizing validity degradation. The estimates for various mathematical-scientific and verbal achievement outcomes were meta-analytically integrated and employed in precision simulations. In doing so, Studies I and II addressed essential gaps identified in previous repertoires in six major dimensions: Taken together, this thesis accumulated novel design parameters and deliberate guidance for RT power analysis (1) tailored to four German student (sub)populations across the entire school career from Grade 1 to 12, (2) matched to 21 achievement (sub)domains, (3) adjusted for 11 covariate sets enriched by empirically supported guidelines, (4) adapted to six RT designs, (5) suitable for latent and manifest analysis models, (6) which were cataloged along with quantifications of their associated uncertainties. These resources are complemented by a plethora of illustrative application examples to gently direct psychological and educational researchers through pivotal steps in the process of RT design. The striking heterogeneity of the design parameter estimates across all these dimensions constitutes the overall, joint key result of Studies I and II. Hence, this work convincingly reinforces calls for a close match between design parameters and the specific peculiarities of the target RT's research context. All in all, the present doctoral thesis offers a—so far unique—nuanced and extensive toolkit to optimize power analysis for sound RTs on student achievement in the German (and similar) school context. It is of utmost importance that research does not tire to spawn robust evidence on what actually works to improve schooling. With this in mind, I hope that the emerging compendia and guidance contribute to the quality and rigor of our randomized experiments in psychology and education.}, language = {en} }