@misc{RianoPachonKleessenNeigenfindetal.2010, author = {Ria{\~n}o-Pach{\´o}n, Diego Mauricio and Kleessen, Sabrina and Neigenfind, Jost and Durek, Pawel and Weber, Elke and Engelsberger, Wolfgang R. and Walther, Dirk and Selbig, Joachim and Schulze, Waltraud X. and Kersten, Birgit}, title = {Proteome-wide survey of phosphorylation patterns affected by nuclear DNA polymorphisms in Arabidopsis thaliana}, series = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {1328}, issn = {1866-8372}, doi = {10.25932/publishup-43118}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-431181}, pages = {19}, year = {2010}, abstract = {Background: Protein phosphorylation is an important post-translational modification influencing many aspects of dynamic cellular behavior. Site-specific phosphorylation of amino acid residues serine, threonine, and tyrosine can have profound effects on protein structure, activity, stability, and interaction with other biomolecules. Phosphorylation sites can be affected in diverse ways in members of any species, one such way is through single nucleotide polymorphisms (SNPs). The availability of large numbers of experimentally identified phosphorylation sites, and of natural variation datasets in Arabidopsis thaliana prompted us to analyze the effect of non-synonymous SNPs (nsSNPs) onto phosphorylation sites. Results: From the analyses of 7,178 experimentally identified phosphorylation sites we found that: (i) Proteins with multiple phosphorylation sites occur more often than expected by chance. (ii) Phosphorylation hotspots show a preference to be located outside conserved domains. (iii) nsSNPs affected experimental phosphorylation sites as much as the corresponding non-phosphorylated amino acid residues. (iv) Losses of experimental phosphorylation sites by nsSNPs were identified in 86 A. thaliana proteins, among them receptor proteins were overrepresented. These results were confirmed by similar analyses of predicted phosphorylation sites in A. thaliana. In addition, predicted threonine phosphorylation sites showed a significant enrichment of nsSNPs towards asparagines and a significant depletion of the synonymous substitution. Proteins in which predicted phosphorylation sites were affected by nsSNPs (loss and gain), were determined to be mainly receptor proteins, stress response proteins and proteins involved in nucleotide and protein binding. Proteins involved in metabolism, catalytic activity and biosynthesis were less affected. Conclusions: We analyzed more than 7,100 experimentally identified phosphorylation sites in almost 4,300 protein-coding loci in silico, thus constituting the largest phosphoproteomics dataset for A. thaliana available to date. Our findings suggest a relatively high variability in the presence or absence of phosphorylation sites between different natural accessions in receptor and other proteins involved in signal transduction. Elucidating the effect of phosphorylation sites affected by nsSNPs on adaptive responses represents an exciting research goal for the future.}, language = {en} } @misc{RepsilberKernTelaaretal.2010, author = {Repsilber, Dirk and Kern, Sabine and Telaar, Anna and Walzl, Gerhard and Black, Gillian F. and Selbig, Joachim and Parida, Shreemanta K. and Kaufmann, Stefan H. E. and Jacobsen, Marc}, title = {Biomarker discovery in heterogeneous tissue samples}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {854}, issn = {1866-8372}, doi = {10.25932/publishup-42934}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-429343}, pages = {17}, year = {2010}, abstract = {Background: For heterogeneous tissues, such as blood, measurements of gene expression are confounded by relative proportions of cell types involved. Conclusions have to rely on estimation of gene expression signals for homogeneous cell populations, e.g. by applying micro-dissection, fluorescence activated cell sorting, or in-silico deconfounding. We studied feasibility and validity of a non-negative matrix decomposition algorithm using experimental gene expression data for blood and sorted cells from the same donor samples. Our objective was to optimize the algorithm regarding detection of differentially expressed genes and to enable its use for classification in the difficult scenario of reversely regulated genes. This would be of importance for the identification of candidate biomarkers in heterogeneous tissues. Results: Experimental data and simulation studies involving noise parameters estimated from these data revealed that for valid detection of differential gene expression, quantile normalization and use of non-log data are optimal. We demonstrate the feasibility of predicting proportions of constituting cell types from gene expression data of single samples, as a prerequisite for a deconfounding-based classification approach. Classification cross-validation errors with and without using deconfounding results are reported as well as sample-size dependencies. Implementation of the algorithm, simulation and analysis scripts are available. Conclusions: The deconfounding algorithm without decorrelation using quantile normalization on non-log data is proposed for biomarkers that are difficult to detect, and for cases where confounding by varying proportions of cell types is the suspected reason. In this case, a deconfounding ranking approach can be used as a powerful alternative to, or complement of, other statistical learning approaches to define candidate biomarkers for molecular diagnosis and prediction in biomedicine, in realistically noisy conditions and with moderate sample sizes.}, language = {en} }