@misc{ChildsNikoloskiMayetal.2009, author = {Childs, Liam H. and Nikoloski, Zoran and May, Patrick and Walther, Dirk}, title = {Identification and classification of ncRNA molecules using graph properties}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-45192}, year = {2009}, abstract = {The study of non-coding RNA genes has received increased attention in recent years fuelled by accumulating evidence that larger portions of genomes than previously acknowledged are transcribed into RNA molecules of mostly unknown function, as well as the discovery of novel non-coding RNA types and functional RNA elements. Here, we demonstrate that specific properties of graphs that represent the predicted RNA secondary structure reflect functional information. We introduce a computational algorithm and an associated web-based tool (GraPPLE) for classifying non-coding RNA molecules as functional and, furthermore, into Rfam families based on their graph properties. Unlike sequence-similarity-based methods and covariance models, GraPPLE is demonstrated to be more robust with regard to increasing sequence divergence, and when combined with existing methods, leads to a significant improvement of prediction accuracy. Furthermore, graph properties identified as most informative are shown to provide an understanding as to what particular structural features render RNA molecules functional. Thus, GraPPLE may offer a valuable computational filtering tool to identify potentially interesting RNA molecules among large candidate datasets.}, language = {en} } @article{ChildsWituckaWallGuentheretal.2010, author = {Childs, Liam H. and Witucka-Wall, Hanna and Guenther, Torsten and Sulpice, Ronan and Korff, Maria V. and Stitt, Mark and Walther, Dirk and Schmid, Karl J. and Altmann, Thomas}, title = {Single feature polymorphism (SFP)-based selective sweep identification and association mapping of growth- related metabolic traits in Arabidopsis thaliana}, issn = {1471-2164}, doi = {10.1186/1471-2164-11-188}, year = {2010}, abstract = {Background: Natural accessions of Arabidopsis thaliana are characterized by a high level of phenotypic variation that can be used to investigate the extent and mode of selection on the primary metabolic traits. A collection of 54 A. thaliana natural accession-derived lines were subjected to deep genotyping through Single Feature Polymorphism (SFP) detection via genomic DNA hybridization to Arabidopsis Tiling 1.0 Arrays for the detection of selective sweeps, and identification of associations between sweep regions and growth-related metabolic traits. Results: A total of 1,072,557 high-quality SFPs were detected and indications for 3,943 deletions and 1,007 duplications were obtained. A significantly lower than expected SFP frequency was observed in protein-, rRNA-, and tRNA-coding regions and in non- repetitive intergenic regions, while pseudogenes, transposons, and non-coding RNA genes are enriched with SFPs. Gene families involved in plant defence or in signalling were identified as highly polymorphic, while several other families including transcription factors are depleted of SFPs. 198 significant associations between metabolic genes and 9 metabolic and growth-related phenotypic traits were detected with annotation hinting at the nature of the relationship. Five significant selective sweep regions were also detected of which one associated significantly with a metabolic trait. Conclusions: We generated a high density polymorphism map for 54 A. thaliana accessions that highlights the variability of resistance genes across geographic ranges and used it to identify selective sweeps and associations between metabolic genes and metabolic phenotypes. Several associations show a clear biological relationship, while many remain requiring further investigation.}, language = {en} } @article{ChristianBraginetsSchulzeetal.2012, author = {Christian, Jan-Ole and Braginets, Rostyslav and Schulze, Waltraud X. and Walther, Dirk}, title = {Characterization and prediction of protein phosphorylation hotspots in Arabidopsis thaliana}, series = {Frontiers in plant science}, volume = {3}, journal = {Frontiers in plant science}, publisher = {Frontiers Research Foundation}, address = {Lausanne}, issn = {1664-462X}, doi = {10.3389/fpls.2012.00207}, pages = {14}, year = {2012}, abstract = {The regulation of protein function by modulating the surface charge status via sequence-locally enriched phosphorylation sites (P-sites) in so called phosphorylation "hotspots" has gained increased attention in recent years. We set out to identify P-hotspots in the model plant Arabidopsis thaliana. We analyzed the spacing of experimentally detected P-sites within peptide-covered regions along Arabidopsis protein sequences as available from the PhosPhAt database. Confirming earlier reports (Schweiger and Lanial, 2010), we found that, indeed, P-sites tend to cluster and that distributions between serine and threonine P-sites to their respected closest next P-site differ significantly from those for tyrosine P-sites. The ability to predict P-hotspots by applying available computational P-site prediction programs that focus on identifying single P-sites was observed to be severely compromised by the inevitable interference of nearby P-sites. We devised a new approach, named HotSPotter, for the prediction of phosphorylation hotspots. HotSPotter is based primarily on local amino acid compositional preferences rather than sequence position-specific motifs and uses support vector machines as the underlying classification engine. HotSPotter correctly identified experimentally determined phosphorylation hotspots in A. thaliana with high accuracy. Applied to the Arabidopsis proteome, HotSPotter-predicted 13,677 candidate P-hotspots in 9,599 proteins corresponding to 7,847 unique genes. Hotspot containing proteins are involved predominantly in signaling processes confirming the surmised modulating role of hotspots in signaling and interaction events. Our study provides new bioinformatics means to identify phosphorylation hotspots and lays the basis for further investigating novel candidate P-hotspots. All phosphorylation hotspot annotations and predictions have been made available as part of the PhosPhAt database at http://phosphat.mpimp-golm.mpg.de.}, language = {en} } @misc{DurekSchudomaWeckwerthetal.2009, author = {Durek, Pawel and Schudoma, Christian and Weckwerth, Wolfram and Selbig, Joachim and Walther, Dirk}, title = {Detection and characterization of 3D-signature phosphorylation site motifs and their contribution towards improved phosphorylation site prediction in proteins}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-45129}, year = {2009}, abstract = {Background: Phosphorylation of proteins plays a crucial role in the regulation and activation of metabolic and signaling pathways and constitutes an important target for pharmaceutical intervention. Central to the phosphorylation process is the recognition of specific target sites by protein kinases followed by the covalent attachment of phosphate groups to the amino acids serine, threonine, or tyrosine. The experimental identification as well as computational prediction of phosphorylation sites (P-sites) has proved to be a challenging problem. Computational methods have focused primarily on extracting predictive features from the local, one-dimensional sequence information surrounding phosphorylation sites. Results: We characterized the spatial context of phosphorylation sites and assessed its usability for improved phosphorylation site predictions. We identified 750 non-redundant, experimentally verified sites with three-dimensional (3D) structural information available in the protein data bank (PDB) and grouped them according to their respective kinase family. We studied the spatial distribution of amino acids around phosphorserines, phosphothreonines, and phosphotyrosines to extract signature 3D-profiles. Characteristic spatial distributions of amino acid residue types around phosphorylation sites were indeed discernable, especially when kinase-family-specific target sites were analyzed. To test the added value of using spatial information for the computational prediction of phosphorylation sites, Support Vector Machines were applied using both sequence as well as structural information. When compared to sequence-only based prediction methods, a small but consistent performance improvement was obtained when the prediction was informed by 3D-context information. Conclusion: While local one-dimensional amino acid sequence information was observed to harbor most of the discriminatory power, spatial context information was identified as relevant for the recognition of kinases and their cognate target sites and can be used for an improved prediction of phosphorylation sites. A web-based service (Phos3D) implementing the developed structurebased P-site prediction method has been made available at http://phos3d.mpimp-golm.mpg.de.}, language = {en} } @misc{MayChristianKempaetal.2009, author = {May, Patrick and Christian, Jan-Ole and Kempa, Stefan and Walther, Dirk}, title = {ChlamyCyc : an integrative systems biology database and web-portal for Chlamydomonas reinhardtii}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-44947}, year = {2009}, abstract = {Background: The unicellular green alga Chlamydomonas reinhardtii is an important eukaryotic model organism for the study of photosynthesis and plant growth. In the era of modern highthroughput technologies there is an imperative need to integrate large-scale data sets from highthroughput experimental techniques using computational methods and database resources to provide comprehensive information about the molecular and cellular organization of a single organism. Results: In the framework of the German Systems Biology initiative GoFORSYS, a pathway database and web-portal for Chlamydomonas (ChlamyCyc) was established, which currently features about 250 metabolic pathways with associated genes, enzymes, and compound information. ChlamyCyc was assembled using an integrative approach combining the recently published genome sequence, bioinformatics methods, and experimental data from metabolomics and proteomics experiments. We analyzed and integrated a combination of primary and secondary database resources, such as existing genome annotations from JGI, EST collections, orthology information, and MapMan classification. Conclusion: ChlamyCyc provides a curated and integrated systems biology repository that will enable and assist in systematic studies of fundamental cellular processes in Chlamydomonas. The ChlamyCyc database and web-portal is freely available under http://chlamycyc.mpimp-golm.mpg.de.}, language = {en} } @article{RianoPachonKleessenNeigenfindetal.2010, author = {Ria{\~n}o-Pach{\´o}n, Diego Mauricio and Kleessen, Sabrina and Neigenfind, Jost and Durek, Pawel and Weber, Elke and Engelsberger, Wolfgang R. and Walther, Dirk and Selbig, Joachim and Schulze, Waltraud X. and Kersten, Birgit}, title = {Proteome-wide survey of phosphorylation patterns affected by nuclear DNA polymorphisms in Arabidopsis thaliana}, series = {BMC Genomics}, volume = {11}, journal = {BMC Genomics}, publisher = {Biomed Central}, address = {London}, issn = {1471-2164}, doi = {10.1186/1471-2164-11-411}, pages = {19}, year = {2010}, abstract = {Background: Protein phosphorylation is an important post-translational modification influencing many aspects of dynamic cellular behavior. Site-specific phosphorylation of amino acid residues serine, threonine, and tyrosine can have profound effects on protein structure, activity, stability, and interaction with other biomolecules. Phosphorylation sites can be affected in diverse ways in members of any species, one such way is through single nucleotide polymorphisms (SNPs). The availability of large numbers of experimentally identified phosphorylation sites, and of natural variation datasets in Arabidopsis thaliana prompted us to analyze the effect of non-synonymous SNPs (nsSNPs) onto phosphorylation sites. Results: From the analyses of 7,178 experimentally identified phosphorylation sites we found that: (i) Proteins with multiple phosphorylation sites occur more often than expected by chance. (ii) Phosphorylation hotspots show a preference to be located outside conserved domains. (iii) nsSNPs affected experimental phosphorylation sites as much as the corresponding non-phosphorylated amino acid residues. (iv) Losses of experimental phosphorylation sites by nsSNPs were identified in 86 A. thaliana proteins, among them receptor proteins were overrepresented. These results were confirmed by similar analyses of predicted phosphorylation sites in A. thaliana. In addition, predicted threonine phosphorylation sites showed a significant enrichment of nsSNPs towards asparagines and a significant depletion of the synonymous substitution. Proteins in which predicted phosphorylation sites were affected by nsSNPs (loss and gain), were determined to be mainly receptor proteins, stress response proteins and proteins involved in nucleotide and protein binding. Proteins involved in metabolism, catalytic activity and biosynthesis were less affected. Conclusions: We analyzed more than 7,100 experimentally identified phosphorylation sites in almost 4,300 protein-coding loci in silico, thus constituting the largest phosphoproteomics dataset for A. thaliana available to date. Our findings suggest a relatively high variability in the presence or absence of phosphorylation sites between different natural accessions in receptor and other proteins involved in signal transduction. Elucidating the effect of phosphorylation sites affected by nsSNPs on adaptive responses represents an exciting research goal for the future.}, language = {en} } @article{SchudomaLarhlimiWalther2011, author = {Schudoma, Christian and Larhlimi, Abdelhalim and Walther, Dirk}, title = {The influence of the local sequence environment on RNA loop structures}, series = {RNA : a publication of the RNA Society}, volume = {17}, journal = {RNA : a publication of the RNA Society}, number = {7}, publisher = {Cold Spring Harbor Laboratory Press}, address = {Cold Spring Harbor, NY}, issn = {1355-8382}, doi = {10.1261/rna.2550211}, pages = {1247 -- 1257}, year = {2011}, abstract = {RNA folding is assumed to be a hierarchical process. The secondary structure of an RNA molecule, signified by base-pairing and stacking interactions between the paired bases, is formed first. Subsequently, the RNA molecule adopts an energetically favorable three-dimensional conformation in the structural space determined mainly by the rotational degrees of freedom associated with the backbone of regions of unpaired nucleotides (loops). To what extent the backbone conformation of RNA loops also results from interactions within the local sequence context or rather follows global optimization constraints alone has not been addressed yet. Because the majority of base stacking interactions are exerted locally, a critical influence of local sequence on local structure appears plausible. Thus, local loop structure ought to be predictable, at least in part, from the local sequence context alone. To test this hypothesis, we used Random Forests on a nonredundant data set of unpaired nucleotides extracted from 97 X-ray structures from the Protein Data Bank (PDB) to predict discrete backbone angle conformations given by the discretized eta/theta-pseudo-torsional space. Predictions on balanced sets with four to six conformational classes using local sequence information yielded average accuracies of up to 55\%, thus significantly better than expected by chance (17\%-25\%). Bases close to the central nucleotide appear to be most tightly linked to its conformation. Our results suggest that RNA loop structure does not only depend on long-range base-pairing interactions; instead, it appears that local sequence context exerts a significant influence on the formation of the local loop structure.}, language = {en} } @article{SprengerRudackSchudomaetal.2015, author = {Sprenger, Heike and Rudack, Katharina and Schudoma, Christian and Neumann, Arne and Seddig, Sylvia and Peters, Rolf and Zuther, Ellen and Kopka, Joachim and Hincha, Dirk K. and Walther, Dirk and Koehl, Karin}, title = {Assessment of drought tolerance and its potential yield penalty in potato}, series = {Functional plant biology : an international journal of plant function}, volume = {42}, journal = {Functional plant biology : an international journal of plant function}, number = {7}, publisher = {CSIRO}, address = {Clayton}, issn = {1445-4408}, doi = {10.1071/FP15013}, pages = {655 -- 667}, year = {2015}, abstract = {Climate models predict an increased likelihood of seasonal droughts for many areas of the world. Breeding for drought tolerance could be accelerated by marker-assisted selection. As a basis for marker identification, we studied the genetic variance, predictability of field performance and potential costs of tolerance in potato (Solanum tuberosum L.). Potato produces high calories per unit of water invested, but is drought-sensitive. In 14 independent pot or field trials, 34 potato cultivars were grown under optimal and reduced water supply to determine starch yield. In an artificial dataset, we tested several stress indices for their power to distinguish tolerant and sensitive genotypes independent of their yield potential. We identified the deviation of relative starch yield from the experimental median (DRYM) as the most efficient index. DRYM corresponded qualitatively to the partial least square model-based metric of drought stress tolerance in a stress effect model. The DRYM identified significant tolerance variation in the European potato cultivar population to allow tolerance breeding and marker identification. Tolerance results from pot trials correlated with those from field trials but predicted field performance worse than field growth parameters. Drought tolerance correlated negatively with yield under optimal conditions in the field. The distribution of yield data versus DRYM indicated that tolerance can be combined with average yield potentials, thus circumventing potential yield penalties in tolerance breeding.}, language = {en} } @article{StoesselSchultedosSantosetal.2018, author = {Stoessel, Daniel and Schulte, Claudia and dos Santos, Marcia C. Teixeira and Scheller, Dieter and Rebollo-Mesa, Irene and Deuschle, Christian and Walther, Dirk and Schauer, Nicolas and Berg, Daniela and da Costa, Andre Nogueira and Maetzler, Walter}, title = {Promising Metabolite Profiles in the Plasma and CSF of Early Clinical}, series = {Frontiers in Aging Neuroscience}, volume = {10}, journal = {Frontiers in Aging Neuroscience}, publisher = {Frontiers Research Foundation}, address = {Lausanne}, issn = {1663-4365}, doi = {10.3389/fnagi.2018.00051}, pages = {14}, year = {2018}, abstract = {Parkinson's disease (PD) shows high heterogeneity with regard to the underlying molecular pathogenesis involving multiple pathways and mechanisms. Diagnosis is still challenging and rests entirely on clinical features. Thus, there is an urgent need for robust diagnostic biofluid markers. Untargeted metabolomics allows establishing low-molecular compound biomarkers in a wide range of complex diseases by the measurement of various molecular classes in biofluids such as blood plasma, serum, and cerebrospinal fluid (CSF). Here, we applied untargeted high-resolution mass spectrometry to determine plasma and CSF metabolite profiles. We semiquantitatively determined small-molecule levels (<= 1.5 kDa) in the plasma and CSF from early PD patients (disease duration 0-4 years; n = 80 and 40, respectively), and sex-and age-matched controls (n = 76 and 38, respectively). We performed statistical analyses utilizing partial least square and random forest analysis with a 70/30 training and testing split approach, leading to the identification of 20 promising plasma and 14 CSF metabolites. The semetabolites differentiated the test set with an AUC of 0.8 (plasma) and 0.9 (CSF). Characteristics of the metabolites indicate perturbations in the glycerophospholipid, sphingolipid, and amino acid metabolism in PD, which underscores the high power of metabolomic approaches. Further studies will enable to develop a potential metabolite-based biomarker panel specific for PD}, language = {en} } @phdthesis{Walther2012, author = {Walther, Dirk}, title = {Bioinformatics studies of biological systems across multiple levels of molecular organization}, address = {Potsdam}, pages = {124 S.}, year = {2012}, language = {en} }