@phdthesis{Reinhardt2020, author = {Reinhardt, Maria}, title = {Hybrid filters and multi-scale models}, doi = {10.25932/publishup-47435}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-474356}, school = {Universit{\"a}t Potsdam}, pages = {xiii, 102}, year = {2020}, abstract = {This thesis is concerned with Data Assimilation, the process of combining model predictions with observations. So called filters are of special interest. One is inter- ested in computing the probability distribution of the state of a physical process in the future, given (possibly) imperfect measurements. This is done using Bayes' rule. The first part focuses on hybrid filters, that bridge between the two main groups of filters: ensemble Kalman filters (EnKF) and particle filters. The first are a group of very stable and computationally cheap algorithms, but they request certain strong assumptions. Particle filters on the other hand are more generally applicable, but computationally expensive and as such not always suitable for high dimensional systems. Therefore it exists a need to combine both groups to benefit from the advantages of each. This can be achieved by splitting the likelihood function, when assimilating a new observation and treating one part of it with an EnKF and the other part with a particle filter. The second part of this thesis deals with the application of Data Assimilation to multi-scale models and the problems that arise from that. One of the main areas of application for Data Assimilation techniques is predicting the development of oceans and the atmosphere. These processes involve several scales and often balance rela- tions between the state variables. The use of Data Assimilation procedures most often violates relations of that kind, which leads to unrealistic and non-physical pre- dictions of the future development of the process eventually. This work discusses the inclusion of a post-processing step after each assimilation step, in which a minimi- sation problem is solved, which penalises the imbalance. This method is tested on four different models, two Hamiltonian systems and two spatially extended models, which adds even more difficulties.}, language = {en} } @phdthesis{Wichitsanguan2016, author = {Wichitsa-nguan, Korakot}, title = {Modifications and extensions of the logistic regression and Cox model}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-90033}, school = {Universit{\"a}t Potsdam}, pages = {x, 131}, year = {2016}, abstract = {In many statistical applications, the aim is to model the relationship between covariates and some outcomes. A choice of the appropriate model depends on the outcome and the research objectives, such as linear models for continuous outcomes, logistic models for binary outcomes and the Cox model for time-to-event data. In epidemiological, medical, biological, societal and economic studies, the logistic regression is widely used to describe the relationship between a response variable as binary outcome and explanatory variables as a set of covariates. However, epidemiologic cohort studies are quite expensive regarding data management since following up a large number of individuals takes long time. Therefore, the case-cohort design is applied to reduce cost and time for data collection. The case-cohort sampling collects a small random sample from the entire cohort, which is called subcohort. The advantage of this design is that the covariate and follow-up data are recorded only on the subcohort and all cases (all members of the cohort who develop the event of interest during the follow-up process). In this thesis, we investigate the estimation in the logistic model for case-cohort design. First, a model with a binary response and a binary covariate is considered. The maximum likelihood estimator (MLE) is described and its asymptotic properties are established. An estimator for the asymptotic variance of the estimator based on the maximum likelihood approach is proposed; this estimator differs slightly from the estimator introduced by Prentice (1986). Simulation results for several proportions of the subcohort show that the proposed estimator gives lower empirical bias and empirical variance than Prentice's estimator. Then the MLE in the logistic regression with discrete covariate under case-cohort design is studied. Here the approach of the binary covariate model is extended. Proving asymptotic normality of estimators, standard errors for the estimators can be derived. The simulation study demonstrates the estimation procedure of the logistic regression model with a one-dimensional discrete covariate. Simulation results for several proportions of the subcohort and different choices of the underlying parameters indicate that the estimator developed here performs reasonably well. Moreover, the comparison between theoretical values and simulation results of the asymptotic variance of estimator is presented. Clearly, the logistic regression is sufficient for the binary outcome refers to be available for all subjects and for a fixed time interval. Nevertheless, in practice, the observations in clinical trials are frequently collected for different time periods and subjects may drop out or relapse from other causes during follow-up. Hence, the logistic regression is not appropriate for incomplete follow-up data; for example, an individual drops out of the study before the end of data collection or an individual has not occurred the event of interest for the duration of the study. These observations are called censored observations. The survival analysis is necessary to solve these problems. Moreover, the time to the occurence of the event of interest is taken into account. The Cox model has been widely used in survival analysis, which can effectively handle the censored data. Cox (1972) proposed the model which is focused on the hazard function. The Cox model is assumed to be λ(t|x) = λ0(t) exp(β^Tx) where λ0(t) is an unspecified baseline hazard at time t and X is the vector of covariates, β is a p-dimensional vector of coefficient. In this thesis, the Cox model is considered under the view point of experimental design. The estimability of the parameter β0 in the Cox model, where β0 denotes the true value of β, and the choice of optimal covariates are investigated. We give new representations of the observed information matrix In(β) and extend results for the Cox model of Andersen and Gill (1982). In this way conditions for the estimability of β0 are formulated. Under some regularity conditions, ∑ is the inverse of the asymptotic variance matrix of the MPLE of β0 in the Cox model and then some properties of the asymptotic variance matrix of the MPLE are highlighted. Based on the results of asymptotic estimability, the calculation of local optimal covariates is considered and shown in examples. In a sensitivity analysis, the efficiency of given covariates is calculated. For neighborhoods of the exponential models, the efficiencies have then been found. It is appeared that for fixed parameters β0, the efficiencies do not change very much for different baseline hazard functions. Some proposals for applicable optimal covariates and a calculation procedure for finding optimal covariates are discussed. Furthermore, the extension of the Cox model where time-dependent coefficient are allowed, is investigated. In this situation, the maximum local partial likelihood estimator for estimating the coefficient function β(·) is described. Based on this estimator, we formulate a new test procedure for testing, whether a one-dimensional coefficient function β(·) has a prespecified parametric form, say β(·; ϑ). The score function derived from the local constant partial likelihood function at d distinct grid points is considered. It is shown that the distribution of the properly standardized quadratic form of this d-dimensional vector under the null hypothesis tends to a Chi-squared distribution. Moreover, the limit statement remains true when replacing the unknown ϑ0 by the MPLE in the hypothetical model and an asymptotic α-test is given by the quantiles or p-values of the limiting Chi-squared distribution. Finally, we propose a bootstrap version of this test. The bootstrap test is only defined for the special case of testing whether the coefficient function is constant. A simulation study illustrates the behavior of the bootstrap test under the null hypothesis and a special alternative. It gives quite good results for the chosen underlying model. References P. K. Andersen and R. D. Gill. Cox's regression model for counting processes: a large samplestudy. Ann. Statist., 10(4):1100{1120, 1982. D. R. Cox. Regression models and life-tables. J. Roy. Statist. Soc. Ser. B, 34:187{220, 1972. R. L. Prentice. A case-cohort design for epidemiologic cohort studies and disease prevention trials. Biometrika, 73(1):1{11, 1986.}, language = {en} } @phdthesis{Solms2017, author = {Solms, Alexander Maximilian}, title = {Integrating nonlinear mixed effects and physiologically-based modeling approaches for the analysis of repeated measurement studies}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-397070}, school = {Universit{\"a}t Potsdam}, pages = {x, 141}, year = {2017}, abstract = {During the drug discovery \& development process, several phases encompassing a number of preclinical and clinical studies have to be successfully passed to demonstrate safety and efficacy of a new drug candidate. As part of these studies, the characterization of the drug's pharmacokinetics (PK) is an important aspect, since the PK is assumed to strongly impact safety and efficacy. To this end, drug concentrations are measured repeatedly over time in a study population. The objectives of such studies are to describe the typical PK time-course and the associated variability between subjects. Furthermore, underlying sources significantly contributing to this variability, e.g. the use of comedication, should be identified. The most commonly used statistical framework to analyse repeated measurement data is the nonlinear mixed effect (NLME) approach. At the same time, ample knowledge about the drug's properties already exists and has been accumulating during the discovery \& development process: Before any drug is tested in humans, detailed knowledge about the PK in different animal species has to be collected. This drug-specific knowledge and general knowledge about the species' physiology is exploited in mechanistic physiological based PK (PBPK) modeling approaches -it is, however, ignored in the classical NLME modeling approach. Mechanistic physiological based models aim to incorporate relevant and known physiological processes which contribute to the overlying process of interest. In comparison to data--driven models they are usually more complex from a mathematical perspective. For example, in many situations, the number of model parameters outrange the number of measurements and thus reliable parameter estimation becomes more complex and partly impossible. As a consequence, the integration of powerful mathematical estimation approaches like the NLME modeling approach -which is widely used in data-driven modeling -and the mechanistic modeling approach is not well established; the observed data is rather used as a confirming instead of a model informing and building input. Another aggravating circumstance of an integrated approach is the inaccessibility to the details of the NLME methodology so that these approaches can be adapted to the specifics and needs of mechanistic modeling. Despite the fact that the NLME modeling approach exists for several decades, details of the mathematical methodology is scattered around a wide range of literature and a comprehensive, rigorous derivation is lacking. Available literature usually only covers selected parts of the mathematical methodology. Sometimes, important steps are not described or are only heuristically motivated, e.g. the iterative algorithm to finally determine the parameter estimates. Thus, in the present thesis the mathematical methodology of NLME modeling is systemically described and complemented to a comprehensive description, comprising the common theme from ideas and motivation to the final parameter estimation. Therein, new insights for the interpretation of different approximation methods used in the context of the NLME modeling approach are given and illustrated; furthermore, similarities and differences between them are outlined. Based on these findings, an expectation-maximization (EM) algorithm to determine estimates of a NLME model is described. Using the EM algorithm and the lumping methodology by Pilari2010, a new approach on how PBPK and NLME modeling can be combined is presented and exemplified for the antibiotic levofloxacin. Therein, the lumping identifies which processes are informed by the available data and the respective model reduction improves the robustness in parameter estimation. Furthermore, it is shown how apriori known factors influencing the variability and apriori known unexplained variability is incorporated to further mechanistically drive the model development. Concludingly, correlation between parameters and between covariates is automatically accounted for due to the mechanistic derivation of the lumping and the covariate relationships. A useful feature of PBPK models compared to classical data-driven PK models is in the possibility to predict drug concentration within all organs and tissue in the body. Thus, the resulting PBPK model for levofloxacin is used to predict drug concentrations and their variability within soft tissues which are the site of action for levofloxacin. These predictions are compared with data of muscle and adipose tissue obtained by microdialysis, which is an invasive technique to measure a proportion of drug in the tissue, allowing to approximate the concentrations in the interstitial fluid of tissues. Because, so far, comparing human in vivo tissue PK and PBPK predictions are not established, a new conceptual framework is derived. The comparison of PBPK model predictions and microdialysis measurements shows an adequate agreement and reveals further strengths of the presented new approach. We demonstrated how mechanistic PBPK models, which are usually developed in the early stage of drug development, can be used as basis for model building in the analysis of later stages, i.e. in clinical studies. As a consequence, the extensively collected and accumulated knowledge about species and drug are utilized and updated with specific volunteer or patient data. The NLME approach combined with mechanistic modeling reveals new insights for the mechanistic model, for example identification and quantification of variability in mechanistic processes. This represents a further contribution to the learn \& confirm paradigm across different stages of drug development. Finally, the applicability of mechanism--driven model development is demonstrated on an example from the field of Quantitative Psycholinguistics to analyse repeated eye movement data. Our approach gives new insight into the interpretation of these experiments and the processes behind.}, language = {en} }