@misc{HischeLarhlimiSchwarzetal.2012, author = {Hische, Manuela and Larhlimi, Abdelhalim and Schwarz, Franziska and Fischer-Rosinsk{\´y}, Antje and Bobbert, Thomas and Assmann, Anke and Catchpole, Gareth S. and Pfeiffer, Andreas F. H. and Willmitzer, Lothar and Selbig, Joachim and Spranger, Joachim}, title = {A distinct metabolic signature predictsdevelopment of fasting plasma glucose}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch Naturwissenschaftliche Reihe}, number = {850}, issn = {1866-8372}, doi = {10.25932/publishup-42740}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-427400}, pages = {12}, year = {2012}, abstract = {Background High blood glucose and diabetes are amongst the conditions causing the greatest losses in years of healthy life worldwide. Therefore, numerous studies aim to identify reliable risk markers for development of impaired glucose metabolism and type 2 diabetes. However, the molecular basis of impaired glucose metabolism is so far insufficiently understood. The development of so called 'omics' approaches in the recent years promises to identify molecular markers and to further understand the molecular basis of impaired glucose metabolism and type 2 diabetes. Although univariate statistical approaches are often applied, we demonstrate here that the application of multivariate statistical approaches is highly recommended to fully capture the complexity of data gained using high-throughput methods. Methods We took blood plasma samples from 172 subjects who participated in the prospective Metabolic Syndrome Berlin Potsdam follow-up study (MESY-BEPO Follow-up). We analysed these samples using Gas Chromatography coupled with Mass Spectrometry (GC-MS), and measured 286 metabolites. Furthermore, fasting glucose levels were measured using standard methods at baseline, and after an average of six years. We did correlation analysis and built linear regression models as well as Random Forest regression models to identify metabolites that predict the development of fasting glucose in our cohort. Results We found a metabolic pattern consisting of nine metabolites that predicted fasting glucose development with an accuracy of 0.47 in tenfold cross-validation using Random Forest regression. We also showed that adding established risk markers did not improve the model accuracy. However, external validation is eventually desirable. Although not all metabolites belonging to the final pattern are identified yet, the pattern directs attention to amino acid metabolism, energy metabolism and redox homeostasis. Conclusions We demonstrate that metabolites identified using a high-throughput method (GC-MS) perform well in predicting the development of fasting plasma glucose over several years. Notably, not single, but a complex pattern of metabolites propels the prediction and therefore reflects the complexity of the underlying molecular mechanisms. This result could only be captured by application of multivariate statistical approaches. Therefore, we highly recommend the usage of statistical methods that seize the complexity of the information given by high-throughput methods.}, language = {en} } @article{RyngajlloChildsLohseetal.2011, author = {Ryngajllo, Malgorzata and Childs, Liam H. and Lohse, Marc and Giorgi, Federico M. and Lude, Anja and Selbig, Joachim and Usadel, Bj{\"o}rn}, title = {SLocX predicting subcellular localization of Arabidopsis proteins leveraging gene expression data}, series = {Frontiers in plant science}, volume = {2}, journal = {Frontiers in plant science}, publisher = {Frontiers Research Foundation}, address = {Lausanne}, issn = {1664-462X}, doi = {10.3389/fpls.2011.00043}, pages = {19}, year = {2011}, abstract = {Despite the growing volume of experimentally validated knowledge about the subcellular localization of plant proteins, a well performing in silico prediction tool is still a necessity. Existing tools, which employ information derived from protein sequence alone, offer limited accuracy and/or rely on full sequence availability. We explored whether gene expression profiling data can be harnessed to enhance prediction performance. To achieve this, we trained several support vector machines to predict the subcellular localization of Arabidopsis thaliana proteins using sequence derived information, expression behavior, or a combination of these data and compared their predictive performance through a cross-validation test. We show that gene expression carries information about the subcellular localization not available in sequence information, yielding dramatic benefits for plastid localization prediction, and some notable improvements for other compartments such as the mito-chondrion, the Golgi, and the plasma membrane. Based on these results, we constructed a novel subcellular localization prediction engine, SLocX, combining gene expression profiling data with protein sequence-based information. We then validated the results of this engine using an independent test set of annotated proteins and a transient expression of GFP fusion proteins. Here, we present the prediction framework and a website of predicted localizations for Arabidopsis. The relatively good accuracy of our prediction engine, even in cases where only partial protein sequence is available (e.g., in sequences lacking the N-terminal region), offers a promising opportunity for similar application to non-sequenced or poorly annotated plant species. Although the prediction scope of our method is currently limited by the availability of expression information on the ATH1 array, we believe that the advances in measuring gene expression technology will make our method applicable for all Arabidopsis proteins.}, language = {en} }