@misc{KoehlBaslerLuedemannetal.2008, author = {K{\"o}hl, Karin I. and Basler, Georg and L{\"u}demann, Alexander and Selbig, Joachim and Walther, Dirk}, title = {A plant resource and experiment management system based on the Golm Plant Database as a basic tool for omics research}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch Naturwissenschaftliche Reihe}, number = {830}, doi = {10.25932/publishup-42759}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-427595}, pages = {13}, year = {2008}, abstract = {Background: For omics experiments, detailed characterisation of experimental material with respect to its genetic features, its cultivation history and its treatment history is a requirement for analyses by bioinformatics tools and for publication needs. Furthermore, meta-analysis of several experiments in systems biology based approaches make it necessary to store this information in a standardised manner, preferentially in relational databases. In the Golm Plant Database System, we devised a data management system based on a classical Laboratory Information Management System combined with web-based user interfaces for data entry and retrieval to collect this information in an academic environment. Results: The database system contains modules representing the genetic features of the germplasm, the experimental conditions and the sampling details. In the germplasm module, genetically identical lines of biological material are generated by defined workflows, starting with the import workflow, followed by further workflows like genetic modification (transformation), vegetative or sexual reproduction. The latter workflows link lines and thus create pedigrees. For experiments, plant objects are generated from plant lines and united in so-called cultures, to which the cultivation conditions are linked. Materials and methods for each cultivation step are stored in a separate ACCESS database of the plant cultivation unit. For all cultures and thus every plant object, each cultivation site and the culture's arrival time at a site are logged by a barcode-scanner based system. Thus, for each plant object, all site-related parameters, e. g. automatically logged climate data, are available. These life history data and genetic information for the plant objects are linked to analytical results by the sampling module, which links sample components to plant object identifiers. This workflow uses controlled vocabulary for organs and treatments. Unique names generated by the system and barcode labels facilitate identification and management of the material. Web pages are provided as user interfaces to facilitate maintaining the system in an environment with many desktop computers and a rapidly changing user community. Web based search tools are the basis for joint use of the material by all researchers of the institute. Conclusion: The Golm Plant Database system, which is based on a relational database, collects the genetic and environmental information on plant material during its production or experimental use at the Max-Planck-Institute of Molecular Plant Physiology. It thus provides information according to the MIAME standard for the component 'Sample' in a highly standardised format. The Plant Database system thus facilitates collaborative work and allows efficient queries in data analysis for systems biology research.}, language = {en} } @article{SprengerRudackSchudomaetal.2015, author = {Sprenger, Heike and Rudack, Katharina and Schudoma, Christian and Neumann, Arne and Seddig, Sylvia and Peters, Rolf and Zuther, Ellen and Kopka, Joachim and Hincha, Dirk K. and Walther, Dirk and Koehl, Karin}, title = {Assessment of drought tolerance and its potential yield penalty in potato}, series = {Functional plant biology : an international journal of plant function}, volume = {42}, journal = {Functional plant biology : an international journal of plant function}, number = {7}, publisher = {CSIRO}, address = {Clayton}, issn = {1445-4408}, doi = {10.1071/FP15013}, pages = {655 -- 667}, year = {2015}, abstract = {Climate models predict an increased likelihood of seasonal droughts for many areas of the world. Breeding for drought tolerance could be accelerated by marker-assisted selection. As a basis for marker identification, we studied the genetic variance, predictability of field performance and potential costs of tolerance in potato (Solanum tuberosum L.). Potato produces high calories per unit of water invested, but is drought-sensitive. In 14 independent pot or field trials, 34 potato cultivars were grown under optimal and reduced water supply to determine starch yield. In an artificial dataset, we tested several stress indices for their power to distinguish tolerant and sensitive genotypes independent of their yield potential. We identified the deviation of relative starch yield from the experimental median (DRYM) as the most efficient index. DRYM corresponded qualitatively to the partial least square model-based metric of drought stress tolerance in a stress effect model. The DRYM identified significant tolerance variation in the European potato cultivar population to allow tolerance breeding and marker identification. Tolerance results from pot trials correlated with those from field trials but predicted field performance worse than field growth parameters. Drought tolerance correlated negatively with yield under optimal conditions in the field. The distribution of yield data versus DRYM indicated that tolerance can be combined with average yield potentials, thus circumventing potential yield penalties in tolerance breeding.}, language = {en} } @phdthesis{Walther2012, author = {Walther, Dirk}, title = {Bioinformatics studies of biological systems across multiple levels of molecular organization}, address = {Potsdam}, pages = {124 S.}, year = {2012}, language = {en} } @article{ChristianBraginetsSchulzeetal.2012, author = {Christian, Jan-Ole and Braginets, Rostyslav and Schulze, Waltraud X. and Walther, Dirk}, title = {Characterization and prediction of protein phosphorylation hotspots in Arabidopsis thaliana}, series = {Frontiers in plant science}, volume = {3}, journal = {Frontiers in plant science}, publisher = {Frontiers Research Foundation}, address = {Lausanne}, issn = {1664-462X}, doi = {10.3389/fpls.2012.00207}, pages = {14}, year = {2012}, abstract = {The regulation of protein function by modulating the surface charge status via sequence-locally enriched phosphorylation sites (P-sites) in so called phosphorylation "hotspots" has gained increased attention in recent years. We set out to identify P-hotspots in the model plant Arabidopsis thaliana. We analyzed the spacing of experimentally detected P-sites within peptide-covered regions along Arabidopsis protein sequences as available from the PhosPhAt database. Confirming earlier reports (Schweiger and Lanial, 2010), we found that, indeed, P-sites tend to cluster and that distributions between serine and threonine P-sites to their respected closest next P-site differ significantly from those for tyrosine P-sites. The ability to predict P-hotspots by applying available computational P-site prediction programs that focus on identifying single P-sites was observed to be severely compromised by the inevitable interference of nearby P-sites. We devised a new approach, named HotSPotter, for the prediction of phosphorylation hotspots. HotSPotter is based primarily on local amino acid compositional preferences rather than sequence position-specific motifs and uses support vector machines as the underlying classification engine. HotSPotter correctly identified experimentally determined phosphorylation hotspots in A. thaliana with high accuracy. Applied to the Arabidopsis proteome, HotSPotter-predicted 13,677 candidate P-hotspots in 9,599 proteins corresponding to 7,847 unique genes. Hotspot containing proteins are involved predominantly in signaling processes confirming the surmised modulating role of hotspots in signaling and interaction events. Our study provides new bioinformatics means to identify phosphorylation hotspots and lays the basis for further investigating novel candidate P-hotspots. All phosphorylation hotspot annotations and predictions have been made available as part of the PhosPhAt database at http://phosphat.mpimp-golm.mpg.de.}, language = {en} } @misc{MayChristianKempaetal.2009, author = {May, Patrick and Christian, Jan-Ole and Kempa, Stefan and Walther, Dirk}, title = {ChlamyCyc : an integrative systems biology database and web-portal for Chlamydomonas reinhardtii}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-44947}, year = {2009}, abstract = {Background: The unicellular green alga Chlamydomonas reinhardtii is an important eukaryotic model organism for the study of photosynthesis and plant growth. In the era of modern highthroughput technologies there is an imperative need to integrate large-scale data sets from highthroughput experimental techniques using computational methods and database resources to provide comprehensive information about the molecular and cellular organization of a single organism. Results: In the framework of the German Systems Biology initiative GoFORSYS, a pathway database and web-portal for Chlamydomonas (ChlamyCyc) was established, which currently features about 250 metabolic pathways with associated genes, enzymes, and compound information. ChlamyCyc was assembled using an integrative approach combining the recently published genome sequence, bioinformatics methods, and experimental data from metabolomics and proteomics experiments. We analyzed and integrated a combination of primary and secondary database resources, such as existing genome annotations from JGI, EST collections, orthology information, and MapMan classification. Conclusion: ChlamyCyc provides a curated and integrated systems biology repository that will enable and assist in systematic studies of fundamental cellular processes in Chlamydomonas. The ChlamyCyc database and web-portal is freely available under http://chlamycyc.mpimp-golm.mpg.de.}, language = {en} } @misc{DurekSchudomaWeckwerthetal.2009, author = {Durek, Pawel and Schudoma, Christian and Weckwerth, Wolfram and Selbig, Joachim and Walther, Dirk}, title = {Detection and characterization of 3D-signature phosphorylation site motifs and their contribution towards improved phosphorylation site prediction in proteins}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-45129}, year = {2009}, abstract = {Background: Phosphorylation of proteins plays a crucial role in the regulation and activation of metabolic and signaling pathways and constitutes an important target for pharmaceutical intervention. Central to the phosphorylation process is the recognition of specific target sites by protein kinases followed by the covalent attachment of phosphate groups to the amino acids serine, threonine, or tyrosine. The experimental identification as well as computational prediction of phosphorylation sites (P-sites) has proved to be a challenging problem. Computational methods have focused primarily on extracting predictive features from the local, one-dimensional sequence information surrounding phosphorylation sites. Results: We characterized the spatial context of phosphorylation sites and assessed its usability for improved phosphorylation site predictions. We identified 750 non-redundant, experimentally verified sites with three-dimensional (3D) structural information available in the protein data bank (PDB) and grouped them according to their respective kinase family. We studied the spatial distribution of amino acids around phosphorserines, phosphothreonines, and phosphotyrosines to extract signature 3D-profiles. Characteristic spatial distributions of amino acid residue types around phosphorylation sites were indeed discernable, especially when kinase-family-specific target sites were analyzed. To test the added value of using spatial information for the computational prediction of phosphorylation sites, Support Vector Machines were applied using both sequence as well as structural information. When compared to sequence-only based prediction methods, a small but consistent performance improvement was obtained when the prediction was informed by 3D-context information. Conclusion: While local one-dimensional amino acid sequence information was observed to harbor most of the discriminatory power, spatial context information was identified as relevant for the recognition of kinases and their cognate target sites and can be used for an improved prediction of phosphorylation sites. A web-based service (Phos3D) implementing the developed structurebased P-site prediction method has been made available at http://phos3d.mpimp-golm.mpg.de.}, language = {en} } @misc{ChildsNikoloskiMayetal.2009, author = {Childs, Liam H. and Nikoloski, Zoran and May, Patrick and Walther, Dirk}, title = {Identification and classification of ncRNA molecules using graph properties}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-45192}, year = {2009}, abstract = {The study of non-coding RNA genes has received increased attention in recent years fuelled by accumulating evidence that larger portions of genomes than previously acknowledged are transcribed into RNA molecules of mostly unknown function, as well as the discovery of novel non-coding RNA types and functional RNA elements. Here, we demonstrate that specific properties of graphs that represent the predicted RNA secondary structure reflect functional information. We introduce a computational algorithm and an associated web-based tool (GraPPLE) for classifying non-coding RNA molecules as functional and, furthermore, into Rfam families based on their graph properties. Unlike sequence-similarity-based methods and covariance models, GraPPLE is demonstrated to be more robust with regard to increasing sequence divergence, and when combined with existing methods, leads to a significant improvement of prediction accuracy. Furthermore, graph properties identified as most informative are shown to provide an understanding as to what particular structural features render RNA molecules functional. Thus, GraPPLE may offer a valuable computational filtering tool to identify potentially interesting RNA molecules among large candidate datasets.}, language = {en} } @article{SprengerErbanSeddigetal.2017, author = {Sprenger, Heike and Erban, Alexander and Seddig, Sylvia and Rudack, Katharina and Thalhammer, Anja and Le, Mai Q. and Walther, Dirk and Zuther, Ellen and Koehl, Karin I. and Kopka, Joachim and Hincha, Dirk K.}, title = {Metabolite and transcript markers for the prediction of potato drought tolerance}, series = {Plant Biotechnology Journal}, volume = {16}, journal = {Plant Biotechnology Journal}, number = {4}, publisher = {Wiley}, address = {Hoboken}, issn = {1467-7644}, doi = {10.1111/pbi.12840}, pages = {939 -- 950}, year = {2017}, abstract = {Potato (Solanum tuberosum L.) is one of the most important food crops worldwide. Current potato varieties are highly susceptible to drought stress. In view of global climate change, selection of cultivars with improved drought tolerance and high yield potential is of paramount importance. Drought tolerance breeding of potato is currently based on direct selection according to yield and phenotypic traits and requires multiple trials under drought conditions. Marker-assisted selection (MAS) is cheaper, faster and reduces classification errors caused by noncontrolled environmental effects. We analysed 31 potato cultivars grown under optimal and reduced water supply in six independent field trials. Drought tolerance was determined as tuber starch yield. Leaf samples from young plants were screened for preselected transcript and nontargeted metabolite abundance using qRT-PCR and GC-MS profiling, respectively. Transcript marker candidates were selected from a published RNA-Seq data set. A Random Forest machine learning approach extracted metabolite and transcript markers for drought tolerance prediction with low error rates of 6\% and 9\%, respectively. Moreover, by combining transcript and metabolite markers, the prediction error was reduced to 4.3\%. Feature selection from Random Forest models allowed model minimization, yielding a minimal combination of only 20 metabolite and transcript markers that were successfully tested for their reproducibility in 16 independent agronomic field trials. We demonstrate that a minimum combination of transcript and metabolite markers sampled at early cultivation stages predicts potato yield stability under drought largely independent of seasonal and regional agronomic conditions.}, language = {en} } @misc{SprengerErbanSeddigetal.2018, author = {Sprenger, Heike and Erban, Alexander and Seddig, Sylvia and Rudack, Katharina and Thalhammer, Anja and Le, Mai Q. and Walther, Dirk and Zuther, Ellen and K{\"o}hl, Karin I. and Kopka, Joachim and Hincha, Dirk K.}, title = {Metabolite and transcript markers for the prediction of potato drought tolerance}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {673}, issn = {1866-8372}, doi = {10.25932/publishup-42463}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-424630}, pages = {12}, year = {2018}, abstract = {Potato (Solanum tuberosum L.) is one of the most important food crops worldwide. Current potato varieties are highly susceptible to drought stress. In view of global climate change, selection of cultivars with improved drought tolerance and high yield potential is of paramount importance. Drought tolerance breeding of potato is currently based on direct selection according to yield and phenotypic traits and requires multiple trials under drought conditions. Marker-assisted selection (MAS) is cheaper, faster and reduces classification errors caused by noncontrolled environmental effects. We analysed 31 potato cultivars grown under optimal and reduced water supply in six independent field trials. Drought tolerance was determined as tuber starch yield. Leaf samples from young plants were screened for preselected transcript and nontargeted metabolite abundance using qRT-PCR and GC-MS profiling, respectively. Transcript marker candidates were selected from a published RNA-Seq data set. A Random Forest machine learning approach extracted metabolite and transcript markers for drought tolerance prediction with low error rates of 6\% and 9\%, respectively. Moreover, by combining transcript and metabolite markers, the prediction error was reduced to 4.3\%. Feature selection from Random Forest models allowed model minimization, yielding a minimal combination of only 20 metabolite and transcript markers that were successfully tested for their reproducibility in 16 independent agronomic field trials. We demonstrate that a minimum combination of transcript and metabolite markers sampled at early cultivation stages predicts potato yield stability under drought largely independent of seasonal and regional agronomic conditions.}, language = {en} } @article{StoesselStellmannWillingetal.2018, author = {Stoessel, Daniel and Stellmann, Jan-Patrick and Willing, Anne and Behrens, Birte and Rosenkranz, Sina C. and Hodecker, Sibylle C. and Stuerner, Klarissa H. and Reinhardt, Stefanie and Fleischer, Sabine and Deuschle, Christian and Maetzler, Walter and Berg, Daniela and Heesen, Christoph and Walther, Dirk and Schauer, Nicolas and Friese, Manuel A. and Pless, Ole}, title = {Metabolomic Profiles for Primary Progressive Multiple Sclerosis Stratification and Disease Course Monitoring}, series = {Frontiers in human neuroscienc}, volume = {12}, journal = {Frontiers in human neuroscienc}, publisher = {Frontiers Research Foundation}, address = {Lausanne}, issn = {1662-5161}, doi = {10.3389/fnhum.2018.00226}, pages = {13}, year = {2018}, abstract = {Primary progressive multiple sclerosis (PPMS) shows a highly variable disease progression with poor prognosis and a characteristic accumulation of disabilities in patients. These hallmarks of PPMS make it difficult to diagnose and currently impossible to efficiently treat. This study aimed to identify plasma metabolite profiles that allow diagnosis of PPMS and its differentiation from the relapsing remitting subtype (RRMS), primary neurodegenerative disease (Parkinson's disease, PD), and healthy controls (HCs) and that significantly change during the disease course and could serve as surrogate markers of multiple sclerosis (MS)-associated neurodegeneration over time. We applied untargeted high-resolution metabolomics to plasma samples to identify PPMS-specific signatures, validated our findings in independent sex- and age-matched PPMS and HC cohorts and built discriminatory models by partial least square discriminant analysis (PLS-DA). This signature was compared to sex- and age-matched RRMS patients, to patients with PD and HC. Finally, we investigated these metabolites in a longitudinal cohort of PPMS patients over a 24-month period. PLS-DA yielded predictive models for classification along with a set of 20 PPMS-specific informative metabolite markers. These metabolites suggest disease-specific alterations in glycerophospholipid and linoleic acid pathways. Notably, the glycerophospholipid LysoPC(20:0) significantly decreased during the observation period. These findings show potential for diagnosis and disease course monitoring, and might serve as biomarkers to assess treatment efficacy in future clinical trials for neuroprotective MS therapies.}, language = {en} }