@misc{ChildsNikoloskiMayetal.2009, author = {Childs, Liam H. and Nikoloski, Zoran and May, Patrick and Walther, Dirk}, title = {Identification and classification of ncRNA molecules using graph properties}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-45192}, year = {2009}, abstract = {The study of non-coding RNA genes has received increased attention in recent years fuelled by accumulating evidence that larger portions of genomes than previously acknowledged are transcribed into RNA molecules of mostly unknown function, as well as the discovery of novel non-coding RNA types and functional RNA elements. Here, we demonstrate that specific properties of graphs that represent the predicted RNA secondary structure reflect functional information. We introduce a computational algorithm and an associated web-based tool (GraPPLE) for classifying non-coding RNA molecules as functional and, furthermore, into Rfam families based on their graph properties. Unlike sequence-similarity-based methods and covariance models, GraPPLE is demonstrated to be more robust with regard to increasing sequence divergence, and when combined with existing methods, leads to a significant improvement of prediction accuracy. Furthermore, graph properties identified as most informative are shown to provide an understanding as to what particular structural features render RNA molecules functional. Thus, GraPPLE may offer a valuable computational filtering tool to identify potentially interesting RNA molecules among large candidate datasets.}, language = {en} } @article{ChildsWituckaWallGuentheretal.2010, author = {Childs, Liam H. and Witucka-Wall, Hanna and Guenther, Torsten and Sulpice, Ronan and Korff, Maria V. and Stitt, Mark and Walther, Dirk and Schmid, Karl J. and Altmann, Thomas}, title = {Single feature polymorphism (SFP)-based selective sweep identification and association mapping of growth- related metabolic traits in Arabidopsis thaliana}, issn = {1471-2164}, doi = {10.1186/1471-2164-11-188}, year = {2010}, abstract = {Background: Natural accessions of Arabidopsis thaliana are characterized by a high level of phenotypic variation that can be used to investigate the extent and mode of selection on the primary metabolic traits. A collection of 54 A. thaliana natural accession-derived lines were subjected to deep genotyping through Single Feature Polymorphism (SFP) detection via genomic DNA hybridization to Arabidopsis Tiling 1.0 Arrays for the detection of selective sweeps, and identification of associations between sweep regions and growth-related metabolic traits. Results: A total of 1,072,557 high-quality SFPs were detected and indications for 3,943 deletions and 1,007 duplications were obtained. A significantly lower than expected SFP frequency was observed in protein-, rRNA-, and tRNA-coding regions and in non- repetitive intergenic regions, while pseudogenes, transposons, and non-coding RNA genes are enriched with SFPs. Gene families involved in plant defence or in signalling were identified as highly polymorphic, while several other families including transcription factors are depleted of SFPs. 198 significant associations between metabolic genes and 9 metabolic and growth-related phenotypic traits were detected with annotation hinting at the nature of the relationship. Five significant selective sweep regions were also detected of which one associated significantly with a metabolic trait. Conclusions: We generated a high density polymorphism map for 54 A. thaliana accessions that highlights the variability of resistance genes across geographic ranges and used it to identify selective sweeps and associations between metabolic genes and metabolic phenotypes. Several associations show a clear biological relationship, while many remain requiring further investigation.}, language = {en} } @article{ChristianBraginetsSchulzeetal.2012, author = {Christian, Jan-Ole and Braginets, Rostyslav and Schulze, Waltraud X. and Walther, Dirk}, title = {Characterization and prediction of protein phosphorylation hotspots in Arabidopsis thaliana}, series = {Frontiers in plant science}, volume = {3}, journal = {Frontiers in plant science}, publisher = {Frontiers Research Foundation}, address = {Lausanne}, issn = {1664-462X}, doi = {10.3389/fpls.2012.00207}, pages = {14}, year = {2012}, abstract = {The regulation of protein function by modulating the surface charge status via sequence-locally enriched phosphorylation sites (P-sites) in so called phosphorylation "hotspots" has gained increased attention in recent years. We set out to identify P-hotspots in the model plant Arabidopsis thaliana. We analyzed the spacing of experimentally detected P-sites within peptide-covered regions along Arabidopsis protein sequences as available from the PhosPhAt database. Confirming earlier reports (Schweiger and Lanial, 2010), we found that, indeed, P-sites tend to cluster and that distributions between serine and threonine P-sites to their respected closest next P-site differ significantly from those for tyrosine P-sites. The ability to predict P-hotspots by applying available computational P-site prediction programs that focus on identifying single P-sites was observed to be severely compromised by the inevitable interference of nearby P-sites. We devised a new approach, named HotSPotter, for the prediction of phosphorylation hotspots. HotSPotter is based primarily on local amino acid compositional preferences rather than sequence position-specific motifs and uses support vector machines as the underlying classification engine. HotSPotter correctly identified experimentally determined phosphorylation hotspots in A. thaliana with high accuracy. Applied to the Arabidopsis proteome, HotSPotter-predicted 13,677 candidate P-hotspots in 9,599 proteins corresponding to 7,847 unique genes. Hotspot containing proteins are involved predominantly in signaling processes confirming the surmised modulating role of hotspots in signaling and interaction events. Our study provides new bioinformatics means to identify phosphorylation hotspots and lays the basis for further investigating novel candidate P-hotspots. All phosphorylation hotspot annotations and predictions have been made available as part of the PhosPhAt database at http://phosphat.mpimp-golm.mpg.de.}, language = {en} } @misc{DurekSchudomaWeckwerthetal.2009, author = {Durek, Pawel and Schudoma, Christian and Weckwerth, Wolfram and Selbig, Joachim and Walther, Dirk}, title = {Detection and characterization of 3D-signature phosphorylation site motifs and their contribution towards improved phosphorylation site prediction in proteins}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-45129}, year = {2009}, abstract = {Background: Phosphorylation of proteins plays a crucial role in the regulation and activation of metabolic and signaling pathways and constitutes an important target for pharmaceutical intervention. Central to the phosphorylation process is the recognition of specific target sites by protein kinases followed by the covalent attachment of phosphate groups to the amino acids serine, threonine, or tyrosine. The experimental identification as well as computational prediction of phosphorylation sites (P-sites) has proved to be a challenging problem. Computational methods have focused primarily on extracting predictive features from the local, one-dimensional sequence information surrounding phosphorylation sites. Results: We characterized the spatial context of phosphorylation sites and assessed its usability for improved phosphorylation site predictions. We identified 750 non-redundant, experimentally verified sites with three-dimensional (3D) structural information available in the protein data bank (PDB) and grouped them according to their respective kinase family. We studied the spatial distribution of amino acids around phosphorserines, phosphothreonines, and phosphotyrosines to extract signature 3D-profiles. Characteristic spatial distributions of amino acid residue types around phosphorylation sites were indeed discernable, especially when kinase-family-specific target sites were analyzed. To test the added value of using spatial information for the computational prediction of phosphorylation sites, Support Vector Machines were applied using both sequence as well as structural information. When compared to sequence-only based prediction methods, a small but consistent performance improvement was obtained when the prediction was informed by 3D-context information. Conclusion: While local one-dimensional amino acid sequence information was observed to harbor most of the discriminatory power, spatial context information was identified as relevant for the recognition of kinases and their cognate target sites and can be used for an improved prediction of phosphorylation sites. A web-based service (Phos3D) implementing the developed structurebased P-site prediction method has been made available at http://phos3d.mpimp-golm.mpg.de.}, language = {en} } @article{HoangGryzikHoppeetal.2022, author = {Hoang, Yen and Gryzik, Stefanie and Hoppe, Ines and Rybak, Alexander and Sch{\"a}dlich, Martin and Kadner, Isabelle and Walther, Dirk and Vera, Julio and Radbruch, Andreas and Groth, Detlef and Baumgart, Sabine and Baumgrass, Ria}, title = {PRI: Re-analysis of a public mass cytometry dataset reveals patterns of effective tumor treatments}, series = {Frontiers in immunology}, volume = {13}, journal = {Frontiers in immunology}, publisher = {Frontiers Media}, address = {Lausanne}, issn = {1664-3224}, doi = {10.3389/fimmu.2022.849329}, pages = {9}, year = {2022}, abstract = {Recently, mass cytometry has enabled quantification of up to 50 parameters for millions of cells per sample. It remains a challenge to analyze such high-dimensional data to exploit the richness of the inherent information, even though many valuable new analysis tools have already been developed. We propose a novel algorithm "pattern recognition of immune cells (PRI)" to tackle these high-dimensional protein combinations in the data. PRI is a tool for the analysis and visualization of cytometry data based on a three or more-parametric binning approach, feature engineering of bin properties of multivariate cell data, and a pseudo-multiparametric visualization. Using a publicly available mass cytometry dataset, we proved that reproducible feature engineering and intuitive understanding of the generated bin plots are helpful hallmarks for re-analysis with PRI. In the CD4(+)T cell population analyzed, PRI revealed two bin-plot patterns (CD90/CD44/CD86 and CD90/CD44/CD27) and 20 bin plot features for threshold-independent classification of mice concerning ineffective and effective tumor treatment. In addition, PRI mapped cell subsets regarding co-expression of the proliferation marker Ki67 with two major transcription factors and further delineated a specific Th1 cell subset. All these results demonstrate the added insights that can be obtained using the non-cluster-based tool PRI for re-analyses of high-dimensional cytometric data.}, language = {en} } @article{KnoxBrownRindfleischGuentheretal.2020, author = {Knox-Brown, Patrick and Rindfleisch, Tobias and G{\"u}nther, Anne and Balow, Kim and Bremer, Anne and Walther, Dirk and Miettinen, Markus S. and Hincha, Dirk K. and Thalhammer, Anja}, title = {Similar Yet Different}, series = {International Journal of Molecular Sciences}, volume = {21}, journal = {International Journal of Molecular Sciences}, number = {8}, publisher = {Molecular Diversity Preservation International}, address = {Basel}, issn = {1422-0067}, doi = {10.3390/ijms21082794}, pages = {25}, year = {2020}, abstract = {The importance of intrinsically disordered late embryogenesis abundant (LEA) proteins in the tolerance to abiotic stresses involving cellular dehydration is undisputed. While structural transitions of LEA proteins in response to changes in water availability are commonly observed and several molecular functions have been suggested, a systematic, comprehensive and comparative study of possible underlying sequence-structure-function relationships is still lacking. We performed molecular dynamics (MD) simulations as well as spectroscopic and light scattering experiments to characterize six members of two distinct, lowly homologous clades of LEA_4 family proteins from Arabidopsis thaliana. We compared structural and functional characteristics to elucidate to what degree structure and function are encoded in LEA protein sequences and complemented these findings with physicochemical properties identified in a systematic bioinformatics study of the entire Arabidopsis thaliana LEA_4 family. Our results demonstrate that although the six experimentally characterized LEA_4 proteins have similar structural and functional characteristics, differences concerning their folding propensity and membrane stabilization capacity during a freeze/thaw cycle are obvious. These differences cannot be easily attributed to sequence conservation, simple physicochemical characteristics or the abundance of sequence motifs. Moreover, the folding propensity does not appear to be correlated with membrane stabilization capacity. Therefore, the refinement of LEA_4 structural and functional properties is likely encoded in specific patterns of their physicochemical characteristics.}, language = {en} } @misc{KnoxBrownRindfleischGuentheretal.2020, author = {Knox-Brown, Patrick and Rindfleisch, Tobias and G{\"u}nther, Anne and Balow, Kim and Bremer, Anne and Walther, Dirk and Miettinen, Markus S. and Hincha, Dirk K. and Thalhammer, Anja}, title = {Similar Yet Different}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {901}, issn = {1866-8372}, doi = {10.25932/publishup-46941}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-469419}, pages = {27}, year = {2020}, abstract = {The importance of intrinsically disordered late embryogenesis abundant (LEA) proteins in the tolerance to abiotic stresses involving cellular dehydration is undisputed. While structural transitions of LEA proteins in response to changes in water availability are commonly observed and several molecular functions have been suggested, a systematic, comprehensive and comparative study of possible underlying sequence-structure-function relationships is still lacking. We performed molecular dynamics (MD) simulations as well as spectroscopic and light scattering experiments to characterize six members of two distinct, lowly homologous clades of LEA_4 family proteins from Arabidopsis thaliana. We compared structural and functional characteristics to elucidate to what degree structure and function are encoded in LEA protein sequences and complemented these findings with physicochemical properties identified in a systematic bioinformatics study of the entire Arabidopsis thaliana LEA_4 family. Our results demonstrate that although the six experimentally characterized LEA_4 proteins have similar structural and functional characteristics, differences concerning their folding propensity and membrane stabilization capacity during a freeze/thaw cycle are obvious. These differences cannot be easily attributed to sequence conservation, simple physicochemical characteristics or the abundance of sequence motifs. Moreover, the folding propensity does not appear to be correlated with membrane stabilization capacity. Therefore, the refinement of LEA_4 structural and functional properties is likely encoded in specific patterns of their physicochemical characteristics.}, language = {en} } @misc{KoehlBaslerLuedemannetal.2008, author = {K{\"o}hl, Karin I. and Basler, Georg and L{\"u}demann, Alexander and Selbig, Joachim and Walther, Dirk}, title = {A plant resource and experiment management system based on the Golm Plant Database as a basic tool for omics research}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch Naturwissenschaftliche Reihe}, number = {830}, doi = {10.25932/publishup-42759}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-427595}, pages = {13}, year = {2008}, abstract = {Background: For omics experiments, detailed characterisation of experimental material with respect to its genetic features, its cultivation history and its treatment history is a requirement for analyses by bioinformatics tools and for publication needs. Furthermore, meta-analysis of several experiments in systems biology based approaches make it necessary to store this information in a standardised manner, preferentially in relational databases. In the Golm Plant Database System, we devised a data management system based on a classical Laboratory Information Management System combined with web-based user interfaces for data entry and retrieval to collect this information in an academic environment. Results: The database system contains modules representing the genetic features of the germplasm, the experimental conditions and the sampling details. In the germplasm module, genetically identical lines of biological material are generated by defined workflows, starting with the import workflow, followed by further workflows like genetic modification (transformation), vegetative or sexual reproduction. The latter workflows link lines and thus create pedigrees. For experiments, plant objects are generated from plant lines and united in so-called cultures, to which the cultivation conditions are linked. Materials and methods for each cultivation step are stored in a separate ACCESS database of the plant cultivation unit. For all cultures and thus every plant object, each cultivation site and the culture's arrival time at a site are logged by a barcode-scanner based system. Thus, for each plant object, all site-related parameters, e. g. automatically logged climate data, are available. These life history data and genetic information for the plant objects are linked to analytical results by the sampling module, which links sample components to plant object identifiers. This workflow uses controlled vocabulary for organs and treatments. Unique names generated by the system and barcode labels facilitate identification and management of the material. Web pages are provided as user interfaces to facilitate maintaining the system in an environment with many desktop computers and a rapidly changing user community. Web based search tools are the basis for joint use of the material by all researchers of the institute. Conclusion: The Golm Plant Database system, which is based on a relational database, collects the genetic and environmental information on plant material during its production or experimental use at the Max-Planck-Institute of Molecular Plant Physiology. It thus provides information according to the MIAME standard for the component 'Sample' in a highly standardised format. The Plant Database system thus facilitates collaborative work and allows efficient queries in data analysis for systems biology research.}, language = {en} } @misc{MayChristianKempaetal.2009, author = {May, Patrick and Christian, Jan-Ole and Kempa, Stefan and Walther, Dirk}, title = {ChlamyCyc : an integrative systems biology database and web-portal for Chlamydomonas reinhardtii}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-44947}, year = {2009}, abstract = {Background: The unicellular green alga Chlamydomonas reinhardtii is an important eukaryotic model organism for the study of photosynthesis and plant growth. In the era of modern highthroughput technologies there is an imperative need to integrate large-scale data sets from highthroughput experimental techniques using computational methods and database resources to provide comprehensive information about the molecular and cellular organization of a single organism. Results: In the framework of the German Systems Biology initiative GoFORSYS, a pathway database and web-portal for Chlamydomonas (ChlamyCyc) was established, which currently features about 250 metabolic pathways with associated genes, enzymes, and compound information. ChlamyCyc was assembled using an integrative approach combining the recently published genome sequence, bioinformatics methods, and experimental data from metabolomics and proteomics experiments. We analyzed and integrated a combination of primary and secondary database resources, such as existing genome annotations from JGI, EST collections, orthology information, and MapMan classification. Conclusion: ChlamyCyc provides a curated and integrated systems biology repository that will enable and assist in systematic studies of fundamental cellular processes in Chlamydomonas. The ChlamyCyc database and web-portal is freely available under http://chlamycyc.mpimp-golm.mpg.de.}, language = {en} } @article{RianoPachonKleessenNeigenfindetal.2010, author = {Ria{\~n}o-Pach{\´o}n, Diego Mauricio and Kleessen, Sabrina and Neigenfind, Jost and Durek, Pawel and Weber, Elke and Engelsberger, Wolfgang R. and Walther, Dirk and Selbig, Joachim and Schulze, Waltraud X. and Kersten, Birgit}, title = {Proteome-wide survey of phosphorylation patterns affected by nuclear DNA polymorphisms in Arabidopsis thaliana}, series = {BMC Genomics}, volume = {11}, journal = {BMC Genomics}, publisher = {Biomed Central}, address = {London}, issn = {1471-2164}, doi = {10.1186/1471-2164-11-411}, pages = {19}, year = {2010}, abstract = {Background: Protein phosphorylation is an important post-translational modification influencing many aspects of dynamic cellular behavior. Site-specific phosphorylation of amino acid residues serine, threonine, and tyrosine can have profound effects on protein structure, activity, stability, and interaction with other biomolecules. Phosphorylation sites can be affected in diverse ways in members of any species, one such way is through single nucleotide polymorphisms (SNPs). The availability of large numbers of experimentally identified phosphorylation sites, and of natural variation datasets in Arabidopsis thaliana prompted us to analyze the effect of non-synonymous SNPs (nsSNPs) onto phosphorylation sites. Results: From the analyses of 7,178 experimentally identified phosphorylation sites we found that: (i) Proteins with multiple phosphorylation sites occur more often than expected by chance. (ii) Phosphorylation hotspots show a preference to be located outside conserved domains. (iii) nsSNPs affected experimental phosphorylation sites as much as the corresponding non-phosphorylated amino acid residues. (iv) Losses of experimental phosphorylation sites by nsSNPs were identified in 86 A. thaliana proteins, among them receptor proteins were overrepresented. These results were confirmed by similar analyses of predicted phosphorylation sites in A. thaliana. In addition, predicted threonine phosphorylation sites showed a significant enrichment of nsSNPs towards asparagines and a significant depletion of the synonymous substitution. Proteins in which predicted phosphorylation sites were affected by nsSNPs (loss and gain), were determined to be mainly receptor proteins, stress response proteins and proteins involved in nucleotide and protein binding. Proteins involved in metabolism, catalytic activity and biosynthesis were less affected. Conclusions: We analyzed more than 7,100 experimentally identified phosphorylation sites in almost 4,300 protein-coding loci in silico, thus constituting the largest phosphoproteomics dataset for A. thaliana available to date. Our findings suggest a relatively high variability in the presence or absence of phosphorylation sites between different natural accessions in receptor and other proteins involved in signal transduction. Elucidating the effect of phosphorylation sites affected by nsSNPs on adaptive responses represents an exciting research goal for the future.}, language = {en} }