@article{ScholzKaplanGuyetal.2005, author = {Scholz, Matthias and Kaplan, F. and Guy, C. L. and Kopka, Joachim and Selbig, Joachim}, title = {Non-linear PCA : a missing data approach}, issn = {1367-4803}, year = {2005}, abstract = {Motivation: Visualizing and analysing the potential non-linear structure of a dataset is becoming an important task in molecular biology. This is even more challenging when the data have missing values. Results: Here, we propose an inverse model that performs non-linear principal component analysis (NLPCA) from incomplete datasets. Missing values are ignored while optimizing the model, but can be estimated afterwards. Results are shown for both artificial and experimental datasets. In contrast to linear methods, non-linear methods were able to give better missing value estimations for non-linear structured data. Application: We applied this technique to a time course of metabolite data from a cold stress experiment on the model plant Arabidopsis thaliana, and could approximate the mapping function from any time point to the metabolite responses. Thus, the inverse NLPCA provides greatly improved information for better understanding the complex response to cold stress}, language = {en} } @article{SteinfathStrehmelPetersetal.2010, author = {Steinfath, Matthias and Strehmel, Nadine and Peters, Rolf and Schauer, Nicolas and Groth, Detlef and Hummel, Jan and Steup, Martin and Selbig, Joachim and Kopka, Joachim and Geigenberger, Peter and Dongen, Joost T. van}, title = {Discovering plant metabolic biomarkers for phenotype prediction using an untargeted approach}, issn = {1467-7644}, doi = {10.1111/j.1467-7652.2010.00516.x}, year = {2010}, abstract = {Biomarkers are used to predict phenotypical properties before these features become apparent and, therefore, are valuable tools for both fundamental and applied research. Diagnostic biomarkers have been discovered in medicine many decades ago and are now commonly applied. While this is routine in the field of medicine, it is of surprise that in agriculture this approach has never been investigated. Up to now, the prediction of phenotypes in plants was based on growing plants and assaying the organs of interest in a time intensive process. For the first time, we demonstrate in this study the application of metabolomics to predict agronomic important phenotypes of a crop plant that was grown in different environments. Our procedure consists of established techniques to screen untargeted for a large amount of metabolites in parallel, in combination with machine learning methods. By using this combination of metabolomics and biomathematical tools metabolites were identified that can be used as biomarkers to improve the prediction of traits. The predictive metabolites can be selected and used subsequently to develop fast, targeted and low-cost diagnostic biomarker assays that can be implemented in breeding programs or quality assessment analysis. The identified metabolic biomarkers allow for the prediction of crop product quality. Furthermore, marker-assisted selection can benefit from the discovery of metabolic biomarkers when other molecular markers come to its limitation. The described marker selection method was developed for potato tubers, but is generally applicable to any crop and trait as it functions independently of genomic information.}, language = {en} } @article{SprengerErbanSeddigetal.2017, author = {Sprenger, Heike and Erban, Alexander and Seddig, Sylvia and Rudack, Katharina and Thalhammer, Anja and Le, Mai Q. and Walther, Dirk and Zuther, Ellen and Koehl, Karin I. and Kopka, Joachim and Hincha, Dirk K.}, title = {Metabolite and transcript markers for the prediction of potato drought tolerance}, series = {Plant Biotechnology Journal}, volume = {16}, journal = {Plant Biotechnology Journal}, number = {4}, publisher = {Wiley}, address = {Hoboken}, issn = {1467-7644}, doi = {10.1111/pbi.12840}, pages = {939 -- 950}, year = {2017}, abstract = {Potato (Solanum tuberosum L.) is one of the most important food crops worldwide. Current potato varieties are highly susceptible to drought stress. In view of global climate change, selection of cultivars with improved drought tolerance and high yield potential is of paramount importance. Drought tolerance breeding of potato is currently based on direct selection according to yield and phenotypic traits and requires multiple trials under drought conditions. Marker-assisted selection (MAS) is cheaper, faster and reduces classification errors caused by noncontrolled environmental effects. We analysed 31 potato cultivars grown under optimal and reduced water supply in six independent field trials. Drought tolerance was determined as tuber starch yield. Leaf samples from young plants were screened for preselected transcript and nontargeted metabolite abundance using qRT-PCR and GC-MS profiling, respectively. Transcript marker candidates were selected from a published RNA-Seq data set. A Random Forest machine learning approach extracted metabolite and transcript markers for drought tolerance prediction with low error rates of 6\% and 9\%, respectively. Moreover, by combining transcript and metabolite markers, the prediction error was reduced to 4.3\%. Feature selection from Random Forest models allowed model minimization, yielding a minimal combination of only 20 metabolite and transcript markers that were successfully tested for their reproducibility in 16 independent agronomic field trials. We demonstrate that a minimum combination of transcript and metabolite markers sampled at early cultivation stages predicts potato yield stability under drought largely independent of seasonal and regional agronomic conditions.}, language = {en} } @article{RautengartenSteinhaeuserBussisetal.2005, author = {Rautengarten, Carsten and Steinhaeuser, Dirk and Bussis, D and Stintzi, A and Schaller, A and Kopka, Joachim and Altmann, Thomas}, title = {Inferring hypotheses on functional relationships of genes : Analysis of the Arabidopsis thaliana subtilase gene family}, year = {2005}, abstract = {The gene family of subtilisin-like serine proteases (subtilases) in Arabidopsis thaliana comprises 56 members, divided into six distinct subfamilies. Whereas the members of five subfamilies are similar to pyrolysins, two genes share stronger similarity to animal kexins. Mutant screens confirmed 144 T-DNA insertion lines with knockouts for 55 out of the 56 subtilases. Apart from SDD1, none of the confirmed homozygous mutants revealed any obvious visible phenotypic alteration during growth under standard conditions. Apart from this specific case, forward genetics gave us no hints about the function of the individual 54 non-characterized subtilase genes. Therefore, the main objective of our work was to overcome the shortcomings of the forward genetic approach and to infer alternative experimental approaches by using an integrative biolinformatics and biological approach. Computational analyses based on transcriptional co-expression and co-response pattern revealed at least two expression networks, suggesting that functional redundancy may exist among subtilases with limited similarity. Furthermore, two hubs were identified, which may be involved in signalling or may represent higher-order regulatory factors involved in responses to environmental cues. A particular enrichment of co- regulated genes with metabolic functions was observed for four subtilases possibly representing late responsive elements of environmental stress. The kexin homologs show stronger associations with genes of transcriptional regulation context. Based on the analyses presented here and in accordance with previously characterized subtilases, we propose three main functions of subtilases: involvement in (i) control of development, (ii) protein turnover, and (iii) action as downstream components of signalling cascades}, language = {en} } @article{LissoSteinhaeuserAltmannetal.2005, author = {Lisso, Janina and Steinhaeuser, Dirk and Altmann, Thomas and Kopka, Joachim and M{\"u}ssig, Carsten}, title = {Identification of brassinosteroid-related genes by means of transcript co-response analyses}, issn = {0305-1048}, year = {2005}, abstract = {The comprehensive systems-biology database (CSB.DB) was used to reveal brassinosteroid (BR)-related genes from expression profiles based on co-response analyses. Genes exhibiting simultaneous changes in transcript levels are candidates of common transcriptional regulation. Combining numerous different experiments in data matrices allows ruling out outliers and conditional changes of transcript levels. CSB.DB was queried for transcriptional co-responses with the BR-signalling components BRI1 and BAK1: 301 out of 9694 genes represented in the nasc0271 database showed co-responses with both genes. As expected, these genes comprised pathway-involved genes (e.g. 72 BR-induced genes), because the BRI1 and BAK1 proteins are required for BR-responses. But transcript co-response takes the analysis a step further compared with direct approaches because BR-related non BR-responsive genes were identified. Insights into networks and the functional context of genes are provided, because factors determining expression patterns are reflected in correlations. Our findings demonstrate that transcript co-response analysis presents a valuable resource to uncover common regulatory patterns of genes. Different data matrices in CSB.DB allow examination of specific biological questions. All matrices are publicly available through CSB.DB. This work presents one possible roadmap to use the CSB.DB resources}, language = {en} } @article{KempaHummelSchwemmeretal.2009, author = {Kempa, Stefan and Hummel, Jan and Schwemmer, Thorsten and Pietzke, Matthias and Strehmel, Nadine and Wienkoop, Stefanie and Kopka, Joachim and Weckwerth, Wolfram}, title = {An automated GCxGC-TOF-MS protocol for batch-wise extraction and alignment of mass isotopomer matrixes from differential C-13-labelling experiments : a case study for photoautotrophic-mixotrophic grown Chlamydomonas reinhardtii cells}, issn = {0233-111X}, doi = {10.1002/jobm.200800337}, year = {2009}, abstract = {Two dimensional gas chromatography coupled to time-of-flight mass spectrometry (GCxGC-TOF-MS) is a promising technique to overcome limits of complex metabolome analysis using one dimensional GC-TOF-MS. Especially at the stage of data export and data mining, however, convenient procedures to cope with the complexity of GCxGC-TOF-MS data are still in development. Here, we present a high sample throughput protocol exploiting first and second retention index for spectral library search and subsequent construction of a high dimensional data matrix useful for statistical analysis. The method was applied to the analysis of 13 C-labelling experiments in the unicellular green alga Chlamydomonas reinhardtii. We developed a rapid sampling and extraction procedure for Chlamydomonas reinhardtii laboratory strain (CC503), a cell wall deficient mutant. By testing all published quenching protocols we observed dramatic metabolite leakage rates for certain metabolites. To circumvent metabolite leakage, samples were directly quenched and analyzed without separation of the medium. The growth medium was adapted to this rapid sampling protocol to avoid interference with GCxGC-TOF-MS analysis. To analyse batches of samples a new software tool, MetMax, was implemented which extracts the isotopomer matrix from stable isotope labelling experiments together with the first and second retention index (RI1 and RI2). To exploit RI1 and RI2 for metabolite identification we used the Golm metabolome database (GMD [1] with RI1/ RI2-reference spectra and new search algorithms. Using those techniques we analysed the dynamics of (CO2)-C-13 and C-13- acetate uptake in Chlamydomonas reinhardtii cells in two different steady states namely photoautotrophic and mixotrophic growth conditions.}, language = {en} } @article{SchroederLissoObataetal.2014, author = {Schroeder, Florian and Lisso, Janina and Obata, Toshihiro and Erban, Alexander and Maximova, Eugenia and Giavalisco, Patrick and Kopka, Joachim and Fernie, Alisdair R. and Willmitzer, Lothar and Muessig, Carsten}, title = {Consequences of induced brassinosteroid deficiency in Arabidopsis leaves}, series = {BMC plant biology}, volume = {14}, journal = {BMC plant biology}, publisher = {BioMed Central}, address = {London}, issn = {1471-2229}, doi = {10.1186/s12870-014-0309-0}, pages = {14}, year = {2014}, abstract = {Background: The identification of brassinosteroid (BR) deficient and BR insensitive mutants provided conclusive evidence that BR is a potent growth-promoting phytohormone. Arabidopsis mutants are characterized by a compact rosette structure, decreased plant height and reduced root system, delayed development, and reduced fertility. Cell expansion, cell division, and multiple developmental processes depend on BR. The molecular and physiological basis of BR action is diverse. The BR signalling pathway controls the activity of transcription factors, and numerous BR responsive genes have been identified. The analysis of dwarf mutants, however, may to some extent reveal phenotypic changes that are an effect of the altered morphology and physiology. This restriction holds particularly true for the analysis of established organs such as rosette leaves. Results: In this study, the mode of BR action was analysed in established leaves by means of two approaches. First, an inhibitor of BR biosynthesis (brassinazole) was applied to 21-day-old wild-type plants. Secondly, BR complementation of BR deficient plants, namely CPD (constitutive photomorphogenic dwarf)-antisense and cbb1 (cabbage1) mutant plants was stopped after 21 days. BR action in established leaves is associated with stimulated cell expansion, an increase in leaf index, starch accumulation, enhanced CO2 release by the tricarboxylic acid cycle, and increased biomass production. Cell number and protein content were barely affected. Conclusion: Previous analysis of BR promoted growth focused on genomic effects. However, the link between growth and changes in gene expression patterns barely provided clues to the physiological and metabolic basis of growth. Our study analysed comprehensive metabolic data sets of leaves with altered BR levels. The data suggest that BR promoted growth may depend on the increased provision and use of carbohydrates and energy. BR may stimulate both anabolic and catabolic pathways.}, language = {en} } @article{HuegeGoetzeSchwarzetal.2011, author = {Huege, Jan and Goetze, Jan and Schwarz, Doreen and Bauwe, Hermann and Hagemann, Martin and Kopka, Joachim}, title = {Modulation of the major Paths of Carbon in photorespiratory mutants of synechocystis}, series = {PLoS one}, volume = {6}, journal = {PLoS one}, number = {1}, publisher = {PLoS}, address = {San Fransisco}, issn = {1932-6203}, doi = {10.1371/journal.pone.0016278}, pages = {12}, year = {2011}, abstract = {Background: Recent studies using transcript and metabolite profiles of wild-type and gene deletion mutants revealed that photorespiratory pathways are essential for the growth of Synechocystis sp. PCC 6803 under atmospheric conditions. Pool size changes of primary metabolites, such as glycine and glycolate, indicated a link to photorespiration. Methodology/Principal Findings: The (13)C labelling kinetics of primary metabolites were analysed in photoautotrophically grown cultures of Synechocystis sp. PCC 6803 by gas chromatography-mass spectrometry (GC-MS) to demonstrate the link with photorespiration. Cells pre-acclimated to high CO(2) (5\%, HC) or limited CO(2) (0.035\%, LC) conditions were pulse-labelled under very high (2\% w/w) (13)C-NaHCO(3) (VHC) conditions followed by treatment with ambient (12)C at HC and LC conditions, respectively. The (13)C enrichment, relative changes in pool size, and (13)C flux of selected metabolites were evaluated. We demonstrate two major paths of CO(2) assimilation via Rubisco in Synechocystis, i.e., from 3PGA via PEP to aspartate, malate and citrate or, to a lesser extent, from 3PGA via glucose-6-phosphate to sucrose. The results reveal evidence of carbon channelling from 3PGA to the PEP pool. Furthermore, (13)C labelling of glycolate was observed under conditions thought to suppress photorespiration. Using the glycolate-accumulating Delta glcD1 mutant, we demonstrate enhanced (13)C partitioning into the glycolate pool under conditions favouring photorespiration and enhanced (13)C partitioning into the glycine pool of the glycine-accumulating Delta gcvT mutant. Under LC conditions, the photorespiratory mutants Delta glcD1 and Delta gcvT showed enhanced activity of the additional carbon-fixing PEP carboxylase pathway. Conclusions/Significance: With our approach of non-steady-state (13)C labelling and analysis of metabolite pool sizes with respective (13)C enrichments, we identify the use and modulation of major pathways of carbon assimilation in Synechocystis in the presence of high and low inorganic carbon supplies.}, language = {en} } @misc{SprengerErbanSeddigetal.2018, author = {Sprenger, Heike and Erban, Alexander and Seddig, Sylvia and Rudack, Katharina and Thalhammer, Anja and Le, Mai Q. and Walther, Dirk and Zuther, Ellen and K{\"o}hl, Karin I. and Kopka, Joachim and Hincha, Dirk K.}, title = {Metabolite and transcript markers for the prediction of potato drought tolerance}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {673}, issn = {1866-8372}, doi = {10.25932/publishup-42463}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-424630}, pages = {12}, year = {2018}, abstract = {Potato (Solanum tuberosum L.) is one of the most important food crops worldwide. Current potato varieties are highly susceptible to drought stress. In view of global climate change, selection of cultivars with improved drought tolerance and high yield potential is of paramount importance. Drought tolerance breeding of potato is currently based on direct selection according to yield and phenotypic traits and requires multiple trials under drought conditions. Marker-assisted selection (MAS) is cheaper, faster and reduces classification errors caused by noncontrolled environmental effects. We analysed 31 potato cultivars grown under optimal and reduced water supply in six independent field trials. Drought tolerance was determined as tuber starch yield. Leaf samples from young plants were screened for preselected transcript and nontargeted metabolite abundance using qRT-PCR and GC-MS profiling, respectively. Transcript marker candidates were selected from a published RNA-Seq data set. A Random Forest machine learning approach extracted metabolite and transcript markers for drought tolerance prediction with low error rates of 6\% and 9\%, respectively. Moreover, by combining transcript and metabolite markers, the prediction error was reduced to 4.3\%. Feature selection from Random Forest models allowed model minimization, yielding a minimal combination of only 20 metabolite and transcript markers that were successfully tested for their reproducibility in 16 independent agronomic field trials. We demonstrate that a minimum combination of transcript and metabolite markers sampled at early cultivation stages predicts potato yield stability under drought largely independent of seasonal and regional agronomic conditions.}, language = {en} } @article{SprengerRudackSchudomaetal.2015, author = {Sprenger, Heike and Rudack, Katharina and Schudoma, Christian and Neumann, Arne and Seddig, Sylvia and Peters, Rolf and Zuther, Ellen and Kopka, Joachim and Hincha, Dirk K. and Walther, Dirk and Koehl, Karin}, title = {Assessment of drought tolerance and its potential yield penalty in potato}, series = {Functional plant biology : an international journal of plant function}, volume = {42}, journal = {Functional plant biology : an international journal of plant function}, number = {7}, publisher = {CSIRO}, address = {Clayton}, issn = {1445-4408}, doi = {10.1071/FP15013}, pages = {655 -- 667}, year = {2015}, abstract = {Climate models predict an increased likelihood of seasonal droughts for many areas of the world. Breeding for drought tolerance could be accelerated by marker-assisted selection. As a basis for marker identification, we studied the genetic variance, predictability of field performance and potential costs of tolerance in potato (Solanum tuberosum L.). Potato produces high calories per unit of water invested, but is drought-sensitive. In 14 independent pot or field trials, 34 potato cultivars were grown under optimal and reduced water supply to determine starch yield. In an artificial dataset, we tested several stress indices for their power to distinguish tolerant and sensitive genotypes independent of their yield potential. We identified the deviation of relative starch yield from the experimental median (DRYM) as the most efficient index. DRYM corresponded qualitatively to the partial least square model-based metric of drought stress tolerance in a stress effect model. The DRYM identified significant tolerance variation in the European potato cultivar population to allow tolerance breeding and marker identification. Tolerance results from pot trials correlated with those from field trials but predicted field performance worse than field growth parameters. Drought tolerance correlated negatively with yield under optimal conditions in the field. The distribution of yield data versus DRYM indicated that tolerance can be combined with average yield potentials, thus circumventing potential yield penalties in tolerance breeding.}, language = {en} }