@article{HummelKeshvariWeckwerthetal.2005, author = {Hummel, Jan and Keshvari, N. and Weckwerth, Wolfram and Selbig, Joachim}, title = {Species-specific analysis of protein sequence motifs using mutual information}, issn = {1471-2105}, year = {2005}, abstract = {Background: Protein sequence motifs are by definition short fragments of conserved amino acids, often associated with a specific function. Accordingly protein sequence profiles derived from multiple sequence alignments provide an alternative description of functional motifs characterizing families of related sequences. Such profiles conveniently reflect functional necessities by pointing out proximity at conserved sequence positions as well as depicting distances at variable positions. Discovering significant conservation characteristics within the variable positions of profiles mirrors group-specific and, in particular, evolutionary features of the underlying sequences. Results: We describe the tool PROfile analysis based on Mutual Information (PROMI) that enables comparative analysis of user-classified protein sequences. PROMI is implemented as a web service using Perl and R as well as other publicly available packages and tools on the server-side. On the client-side platform-independence is achieved by generally applied internet delivery standards. As one possible application analysis of the zinc finger C2H2-type protein domain is introduced to illustrate the functionality of the tool. Conclusion: The web service PROMI should assist researchers to detect evolutionary correlations in protein profiles of defined biological sequences. It is available at http:// promi.mpimpgolm. mpg.de where additional documentation can be found}, language = {en} } @misc{DurekSchudomaWeckwerthetal.2009, author = {Durek, Pawel and Schudoma, Christian and Weckwerth, Wolfram and Selbig, Joachim and Walther, Dirk}, title = {Detection and characterization of 3D-signature phosphorylation site motifs and their contribution towards improved phosphorylation site prediction in proteins}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-45129}, year = {2009}, abstract = {Background: Phosphorylation of proteins plays a crucial role in the regulation and activation of metabolic and signaling pathways and constitutes an important target for pharmaceutical intervention. Central to the phosphorylation process is the recognition of specific target sites by protein kinases followed by the covalent attachment of phosphate groups to the amino acids serine, threonine, or tyrosine. The experimental identification as well as computational prediction of phosphorylation sites (P-sites) has proved to be a challenging problem. Computational methods have focused primarily on extracting predictive features from the local, one-dimensional sequence information surrounding phosphorylation sites. Results: We characterized the spatial context of phosphorylation sites and assessed its usability for improved phosphorylation site predictions. We identified 750 non-redundant, experimentally verified sites with three-dimensional (3D) structural information available in the protein data bank (PDB) and grouped them according to their respective kinase family. We studied the spatial distribution of amino acids around phosphorserines, phosphothreonines, and phosphotyrosines to extract signature 3D-profiles. Characteristic spatial distributions of amino acid residue types around phosphorylation sites were indeed discernable, especially when kinase-family-specific target sites were analyzed. To test the added value of using spatial information for the computational prediction of phosphorylation sites, Support Vector Machines were applied using both sequence as well as structural information. When compared to sequence-only based prediction methods, a small but consistent performance improvement was obtained when the prediction was informed by 3D-context information. Conclusion: While local one-dimensional amino acid sequence information was observed to harbor most of the discriminatory power, spatial context information was identified as relevant for the recognition of kinases and their cognate target sites and can be used for an improved prediction of phosphorylation sites. A web-based service (Phos3D) implementing the developed structurebased P-site prediction method has been made available at http://phos3d.mpimp-golm.mpg.de.}, language = {en} } @article{HoehenwarterLarhlimiHummeletal.2011, author = {H{\"o}henwarter, Wolfgang and Larhlimi, Abdelhalim and Hummel, Jan and Egelhofer, Volker and Selbig, Joachim and van Dongen, Joost T. and Wienkoop, Stefanie and Weckwerth, Wolfram}, title = {MAPA Distinguishes genotype-specific variability of highly similar regulatory protein isoforms in potato tuber}, series = {Journal of proteome research}, volume = {10}, journal = {Journal of proteome research}, number = {7}, publisher = {American Chemical Society}, address = {Washington}, issn = {1535-3893}, doi = {10.1021/pr101109a}, pages = {2979 -- 2991}, year = {2011}, abstract = {Mass Accuracy Precursor Alignment is a fast and flexible method for comparative proteome analysis that allows the comparison of unprecedented numbers of shotgun proteomics analyses on a personal computer in a matter of hours. We compared 183 LC-MS analyses and more than 2 million MS/MS spectra and could define and separate the proteomic phenotypes of field grown tubers of 12 tetraploid cultivars of the crop plant Solanum tuberosum. Protein isoforms of patatin as well as other major gene families such as lipoxygenase and cysteine protease inhibitor that regulate tuber development were found to be the primary source of variability between the cultivars. This suggests that differentially expressed protein isoforms modulate genotype specific tuber development and the plant phenotype. We properly assigned the measured abundance of tryptic peptides to different protein isoforms that share extensive stretches of primary structure and thus inferred their abundance. Peptides unique to different protein isoforms were used to classify the remaining peptides assigned to the entire subset of isoforms based on a common abundance profile using multivariate statistical procedures. We identified nearly 4000,proteins which we used for quantitative functional annotation making this the most extensive study of the tuber proteome to date.}, language = {en} }