@misc{Matzk2016, type = {Master Thesis}, author = {Matzk, S{\"o}ren}, title = {Predictive analysis of metabolic and preventive patient data}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-406103}, school = {Universit{\"a}t Potsdam}, pages = {XI, 63}, year = {2016}, abstract = {Every day huge amounts of medical records are stored by means of hospitals' and medical offices' software. These data are generally unconsidered in research. In this work anonymized everyday medical records ascertained in a physician's office, cov- ering holistic internal medicine in combination with orthomolecular medicine, are analyzed. Due to the lack of cooperation by the provider of the medical practice software a selection of diagnoses and anthropometric parameters was extracted manually. Information about patients' treatment are not available in this study. Nevertheless, data mining approaches in- cluding machine learning techniques are used to enable research, prevention and monitoring of patients' course of treatment. The potential of these everyday medical data is demonstrated by investigating co-morbidity and pyroluria which is a metabolic dysfunction indicated by increased levels of hydroxy- hemopyrrolin-2-one (HPL). It points out that the metabolic syndrome forms a cluster of its components and cancer, as well as mental disorders are grouped with thyroid diseases including autoimmune thyroid diseases. In contrast to prevailing assumptions in which it was estimated that approximately 10 \% of the population show increased levels of HPL, in this analysis 84.9 \% of the tested patients have an increased concentration of HPL. Prevention is illustrated by using decision tree models to predict diseases. Evaluation of the obtained model for Hashimoto's disease yield an accuracy of 87.5 \%. The model generated for hypothyroidism (accuracy of 60.9 \%) reveals shortcomings due to missing information about the treatment. Dynamics in the biomolecular status of 20 patients who have visited the medical office at least one time a year between 2010 and 2014 for laboratory tests are visualized by STATIS, a consensus analysis based on an extension to principal component analysis. Thereby, one can obtain patterns which are predestinated for specific diseases as hypertension. This study demonstrates that these often overlooked everyday data are challenging due to its sparsity and heterogeneity but its analysis is a great possibility to do research on disease profiles of real patients.}, language = {de} }