@article{RyngajlloChildsLohseetal.2011, author = {Ryngajllo, Malgorzata and Childs, Liam H. and Lohse, Marc and Giorgi, Federico M. and Lude, Anja and Selbig, Joachim and Usadel, Bj{\"o}rn}, title = {SLocX predicting subcellular localization of Arabidopsis proteins leveraging gene expression data}, series = {Frontiers in plant science}, volume = {2}, journal = {Frontiers in plant science}, publisher = {Frontiers Research Foundation}, address = {Lausanne}, issn = {1664-462X}, doi = {10.3389/fpls.2011.00043}, pages = {19}, year = {2011}, abstract = {Despite the growing volume of experimentally validated knowledge about the subcellular localization of plant proteins, a well performing in silico prediction tool is still a necessity. Existing tools, which employ information derived from protein sequence alone, offer limited accuracy and/or rely on full sequence availability. We explored whether gene expression profiling data can be harnessed to enhance prediction performance. To achieve this, we trained several support vector machines to predict the subcellular localization of Arabidopsis thaliana proteins using sequence derived information, expression behavior, or a combination of these data and compared their predictive performance through a cross-validation test. We show that gene expression carries information about the subcellular localization not available in sequence information, yielding dramatic benefits for plastid localization prediction, and some notable improvements for other compartments such as the mito-chondrion, the Golgi, and the plasma membrane. Based on these results, we constructed a novel subcellular localization prediction engine, SLocX, combining gene expression profiling data with protein sequence-based information. We then validated the results of this engine using an independent test set of annotated proteins and a transient expression of GFP fusion proteins. Here, we present the prediction framework and a website of predicted localizations for Arabidopsis. The relatively good accuracy of our prediction engine, even in cases where only partial protein sequence is available (e.g., in sequences lacking the N-terminal region), offers a promising opportunity for similar application to non-sequenced or poorly annotated plant species. Although the prediction scope of our method is currently limited by the availability of expression information on the ATH1 array, we believe that the advances in measuring gene expression technology will make our method applicable for all Arabidopsis proteins.}, language = {en} } @article{ChildsWituckaWallGuentheretal.2010, author = {Childs, Liam H. and Witucka-Wall, Hanna and Guenther, Torsten and Sulpice, Ronan and Korff, Maria V. and Stitt, Mark and Walther, Dirk and Schmid, Karl J. and Altmann, Thomas}, title = {Single feature polymorphism (SFP)-based selective sweep identification and association mapping of growth- related metabolic traits in Arabidopsis thaliana}, issn = {1471-2164}, doi = {10.1186/1471-2164-11-188}, year = {2010}, abstract = {Background: Natural accessions of Arabidopsis thaliana are characterized by a high level of phenotypic variation that can be used to investigate the extent and mode of selection on the primary metabolic traits. A collection of 54 A. thaliana natural accession-derived lines were subjected to deep genotyping through Single Feature Polymorphism (SFP) detection via genomic DNA hybridization to Arabidopsis Tiling 1.0 Arrays for the detection of selective sweeps, and identification of associations between sweep regions and growth-related metabolic traits. Results: A total of 1,072,557 high-quality SFPs were detected and indications for 3,943 deletions and 1,007 duplications were obtained. A significantly lower than expected SFP frequency was observed in protein-, rRNA-, and tRNA-coding regions and in non- repetitive intergenic regions, while pseudogenes, transposons, and non-coding RNA genes are enriched with SFPs. Gene families involved in plant defence or in signalling were identified as highly polymorphic, while several other families including transcription factors are depleted of SFPs. 198 significant associations between metabolic genes and 9 metabolic and growth-related phenotypic traits were detected with annotation hinting at the nature of the relationship. Five significant selective sweep regions were also detected of which one associated significantly with a metabolic trait. Conclusions: We generated a high density polymorphism map for 54 A. thaliana accessions that highlights the variability of resistance genes across geographic ranges and used it to identify selective sweeps and associations between metabolic genes and metabolic phenotypes. Several associations show a clear biological relationship, while many remain requiring further investigation.}, language = {en} }