@article{HischeLuisDominguezPfeifferetal.2010, author = {Hische, Manuela and Luis-Dominguez, Olga and Pfeiffer, Andreas F. H. and Schwarz, Peter E. and Selbig, Joachim and Spranger, Joachim}, title = {Decision trees as a simple-to-use and reliable tool to identify individuals with impaired glucose metabolism or type 2 diabetes mellitus}, issn = {0804-4643}, doi = {10.1530/Eje-10-0649}, year = {2010}, abstract = {Objective: The prevalence of unknown impaired fasting glucose (IFG), impaired glucose tolerance (IGT), or type 2 diabetes mellitus (T2DM) is high. Numerous studies demonstrated that IFG, IGT, or T2DM are associated with increased cardiovascular risk, therefore an improved identification strategy would be desirable. The objective of this study was to create a simple and reliable tool to identify individuals with impaired glucose metabolism (IGM). Design and methods: A cohort of 1737 individuals (1055 controls, 682 with previously unknown IGM) was screened by 75 g oral glucose tolerance test (OGTT). Supervised machine learning was used to automatically generate decision trees to identify individuals with IGM. To evaluate the accuracy of identification, a tenfold cross-validation was performed. Resulting trees were subsequently re-evaluated in a second, independent cohort of 1998 individuals (1253 controls, 745 unknown IGM). Results: A clinical decision tree included age and systolic blood pressure (sensitivity 89.3\%, specificity 37.4\%, and positive predictive value (PPV) 48.0\%), while a tree based on clinical and laboratory data included fasting glucose and systolic blood pressure (sensitivity 89.7\%, specificity 54.6\%, and PPV 56.2\%). The inclusion of additional parameters did not improve test quality. The external validation approach confirmed the presented decision trees. Conclusion: We proposed a simple tool to identify individuals with existing IGM. From a practical perspective, fasting blood glucose and blood pressure measurements should be regularly measured in all individuals presenting in outpatient clinics. An OGTT appears to be useful only if the subjects are older than 48 years or show abnormalities in fasting glucose or blood pressure.}, language = {en} } @misc{RepsilberKernTelaaretal.2010, author = {Repsilber, Dirk and Kern, Sabine and Telaar, Anna and Walzl, Gerhard and Black, Gillian F. and Selbig, Joachim and Parida, Shreemanta K. and Kaufmann, Stefan H. E. and Jacobsen, Marc}, title = {Biomarker discovery in heterogeneous tissue samples}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {854}, issn = {1866-8372}, doi = {10.25932/publishup-42934}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-429343}, pages = {17}, year = {2010}, abstract = {Background: For heterogeneous tissues, such as blood, measurements of gene expression are confounded by relative proportions of cell types involved. Conclusions have to rely on estimation of gene expression signals for homogeneous cell populations, e.g. by applying micro-dissection, fluorescence activated cell sorting, or in-silico deconfounding. We studied feasibility and validity of a non-negative matrix decomposition algorithm using experimental gene expression data for blood and sorted cells from the same donor samples. Our objective was to optimize the algorithm regarding detection of differentially expressed genes and to enable its use for classification in the difficult scenario of reversely regulated genes. This would be of importance for the identification of candidate biomarkers in heterogeneous tissues. Results: Experimental data and simulation studies involving noise parameters estimated from these data revealed that for valid detection of differential gene expression, quantile normalization and use of non-log data are optimal. We demonstrate the feasibility of predicting proportions of constituting cell types from gene expression data of single samples, as a prerequisite for a deconfounding-based classification approach. Classification cross-validation errors with and without using deconfounding results are reported as well as sample-size dependencies. Implementation of the algorithm, simulation and analysis scripts are available. Conclusions: The deconfounding algorithm without decorrelation using quantile normalization on non-log data is proposed for biomarkers that are difficult to detect, and for cases where confounding by varying proportions of cell types is the suspected reason. In this case, a deconfounding ranking approach can be used as a powerful alternative to, or complement of, other statistical learning approaches to define candidate biomarkers for molecular diagnosis and prediction in biomedicine, in realistically noisy conditions and with moderate sample sizes.}, language = {en} } @article{RianoPachonKleessenNeigenfindetal.2010, author = {Ria{\~n}o-Pach{\´o}n, Diego Mauricio and Kleessen, Sabrina and Neigenfind, Jost and Durek, Pawel and Weber, Elke and Engelsberger, Wolfgang R. and Walther, Dirk and Selbig, Joachim and Schulze, Waltraud X. and Kersten, Birgit}, title = {Proteome-wide survey of phosphorylation patterns affected by nuclear DNA polymorphisms in Arabidopsis thaliana}, series = {BMC Genomics}, volume = {11}, journal = {BMC Genomics}, publisher = {Biomed Central}, address = {London}, issn = {1471-2164}, doi = {10.1186/1471-2164-11-411}, pages = {19}, year = {2010}, abstract = {Background: Protein phosphorylation is an important post-translational modification influencing many aspects of dynamic cellular behavior. Site-specific phosphorylation of amino acid residues serine, threonine, and tyrosine can have profound effects on protein structure, activity, stability, and interaction with other biomolecules. Phosphorylation sites can be affected in diverse ways in members of any species, one such way is through single nucleotide polymorphisms (SNPs). The availability of large numbers of experimentally identified phosphorylation sites, and of natural variation datasets in Arabidopsis thaliana prompted us to analyze the effect of non-synonymous SNPs (nsSNPs) onto phosphorylation sites. Results: From the analyses of 7,178 experimentally identified phosphorylation sites we found that: (i) Proteins with multiple phosphorylation sites occur more often than expected by chance. (ii) Phosphorylation hotspots show a preference to be located outside conserved domains. (iii) nsSNPs affected experimental phosphorylation sites as much as the corresponding non-phosphorylated amino acid residues. (iv) Losses of experimental phosphorylation sites by nsSNPs were identified in 86 A. thaliana proteins, among them receptor proteins were overrepresented. These results were confirmed by similar analyses of predicted phosphorylation sites in A. thaliana. In addition, predicted threonine phosphorylation sites showed a significant enrichment of nsSNPs towards asparagines and a significant depletion of the synonymous substitution. Proteins in which predicted phosphorylation sites were affected by nsSNPs (loss and gain), were determined to be mainly receptor proteins, stress response proteins and proteins involved in nucleotide and protein binding. Proteins involved in metabolism, catalytic activity and biosynthesis were less affected. Conclusions: We analyzed more than 7,100 experimentally identified phosphorylation sites in almost 4,300 protein-coding loci in silico, thus constituting the largest phosphoproteomics dataset for A. thaliana available to date. Our findings suggest a relatively high variability in the presence or absence of phosphorylation sites between different natural accessions in receptor and other proteins involved in signal transduction. Elucidating the effect of phosphorylation sites affected by nsSNPs on adaptive responses represents an exciting research goal for the future.}, language = {en} } @misc{RianoPachonKleessenNeigenfindetal.2010, author = {Ria{\~n}o-Pach{\´o}n, Diego Mauricio and Kleessen, Sabrina and Neigenfind, Jost and Durek, Pawel and Weber, Elke and Engelsberger, Wolfgang R. and Walther, Dirk and Selbig, Joachim and Schulze, Waltraud X. and Kersten, Birgit}, title = {Proteome-wide survey of phosphorylation patterns affected by nuclear DNA polymorphisms in Arabidopsis thaliana}, series = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {1328}, issn = {1866-8372}, doi = {10.25932/publishup-43118}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-431181}, pages = {19}, year = {2010}, abstract = {Background: Protein phosphorylation is an important post-translational modification influencing many aspects of dynamic cellular behavior. Site-specific phosphorylation of amino acid residues serine, threonine, and tyrosine can have profound effects on protein structure, activity, stability, and interaction with other biomolecules. Phosphorylation sites can be affected in diverse ways in members of any species, one such way is through single nucleotide polymorphisms (SNPs). The availability of large numbers of experimentally identified phosphorylation sites, and of natural variation datasets in Arabidopsis thaliana prompted us to analyze the effect of non-synonymous SNPs (nsSNPs) onto phosphorylation sites. Results: From the analyses of 7,178 experimentally identified phosphorylation sites we found that: (i) Proteins with multiple phosphorylation sites occur more often than expected by chance. (ii) Phosphorylation hotspots show a preference to be located outside conserved domains. (iii) nsSNPs affected experimental phosphorylation sites as much as the corresponding non-phosphorylated amino acid residues. (iv) Losses of experimental phosphorylation sites by nsSNPs were identified in 86 A. thaliana proteins, among them receptor proteins were overrepresented. These results were confirmed by similar analyses of predicted phosphorylation sites in A. thaliana. In addition, predicted threonine phosphorylation sites showed a significant enrichment of nsSNPs towards asparagines and a significant depletion of the synonymous substitution. Proteins in which predicted phosphorylation sites were affected by nsSNPs (loss and gain), were determined to be mainly receptor proteins, stress response proteins and proteins involved in nucleotide and protein binding. Proteins involved in metabolism, catalytic activity and biosynthesis were less affected. Conclusions: We analyzed more than 7,100 experimentally identified phosphorylation sites in almost 4,300 protein-coding loci in silico, thus constituting the largest phosphoproteomics dataset for A. thaliana available to date. Our findings suggest a relatively high variability in the presence or absence of phosphorylation sites between different natural accessions in receptor and other proteins involved in signal transduction. Elucidating the effect of phosphorylation sites affected by nsSNPs on adaptive responses represents an exciting research goal for the future.}, language = {en} } @article{SteinfathStrehmelPetersetal.2010, author = {Steinfath, Matthias and Strehmel, Nadine and Peters, Rolf and Schauer, Nicolas and Groth, Detlef and Hummel, Jan and Steup, Martin and Selbig, Joachim and Kopka, Joachim and Geigenberger, Peter and Dongen, Joost T. van}, title = {Discovering plant metabolic biomarkers for phenotype prediction using an untargeted approach}, issn = {1467-7644}, doi = {10.1111/j.1467-7652.2010.00516.x}, year = {2010}, abstract = {Biomarkers are used to predict phenotypical properties before these features become apparent and, therefore, are valuable tools for both fundamental and applied research. Diagnostic biomarkers have been discovered in medicine many decades ago and are now commonly applied. While this is routine in the field of medicine, it is of surprise that in agriculture this approach has never been investigated. Up to now, the prediction of phenotypes in plants was based on growing plants and assaying the organs of interest in a time intensive process. For the first time, we demonstrate in this study the application of metabolomics to predict agronomic important phenotypes of a crop plant that was grown in different environments. Our procedure consists of established techniques to screen untargeted for a large amount of metabolites in parallel, in combination with machine learning methods. By using this combination of metabolomics and biomathematical tools metabolites were identified that can be used as biomarkers to improve the prediction of traits. The predictive metabolites can be selected and used subsequently to develop fast, targeted and low-cost diagnostic biomarker assays that can be implemented in breeding programs or quality assessment analysis. The identified metabolic biomarkers allow for the prediction of crop product quality. Furthermore, marker-assisted selection can benefit from the discovery of metabolic biomarkers when other molecular markers come to its limitation. The described marker selection method was developed for potato tubers, but is generally applicable to any crop and trait as it functions independently of genomic information.}, language = {en} } @article{TimmerTheissJuerchottetal.2010, author = {Timmer, Marco and Theiss, Hans and J{\"u}rchott, Katrin and Ries, Christian and Paron, Igor and Franz, W. and Selbig, Joachim and Guo, Ketai and Tonn, J{\"o}rg and Schichor, Christian}, title = {Stromal-Derived Factor 1a (Sdf-1a), a Homing Factor for Mesenchymal Progenitor Cells, Is Elevated in Tumor Tissue and Plasma of Glioma Patients}, issn = {1522-8517}, year = {2010}, abstract = {Malignant gliomas are a fatal disease lacking sufficient possibilities for early diagnosis and chemical markers to detect remission or relapse. The recruitment of progenitor cells such as mesenchymal stem cells (MSC) is a main feature of gliomas. Stromal cell-derived factor-1 (SDF-1), a chemokine produced in glioma cell lines, enhances migration in MSC and has been associated with cell survival and apoptosis in gliomas. Therefore, this study was performed to evaluate (i) whether SDF-1 and its receptors are expressed in human malignant gliomas in situ and (ii) if SDF-1 might potentially play a role in recruiting MSCs into human glioma. In glioblastoma tissue, immunohistochemistry revealed that SDF-1 and its receptor CXCR4 are expressed in regions of angiogenesis and necrosis, and qPCR showed that SDF-1 is elevated. Public expression data indicated that CXCR4 was upregulated. The latter data also illustrate that SDF-1 could be up- or downregulated in glioma compared to normal brain in a transcript-specific manner. In plasma, SDF-1 is elevated in glioma patients. The level is reduced by both dexamethasone intake and surgery. Dexamethasone also decreased SDF-1 production in cells in vitro. The undirected migration of human MSC (hMSC) was not enhanced by the addition of SDF-1. However, SDF-1 stimulated directed invasion of hMSC in a dose-dependent manner. Taken together, we show that SDF-1 is a potent chemoattractant of progenitor cells such as hMSCs and that its expression is elevated in glioma tissue, which results in elevated SDF-1 levels in the patient's plasma samples with concomittant decrease after tumor resection. The fact that elevated SDF-1 plasma levels are significantly decreased after tumor surgery could be a first hint that SDF-1 might act as tumor marker for malignant gliomas in order to detect disease progression or remission, respectively.}, language = {en} }