@phdthesis{Schwahn2018,
  author    = {Schwahn, Kevin},
  title     = {Data driven approaches to infer the regulatory mechanism shaping and constraining levels of metabolites in metabolic networks},
  doi       = {10.25932/publishup-42324},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-423240},
  school      = {Universit{\"a}t Potsdam},
  pages     = {109},
  year      = {2018},
  abstract  = {Systems biology aims at investigating biological systems in its entirety by gathering and analyzing large-scale data sets about the underlying components. Computational systems biology approaches use these large-scale data sets to create models at different scales and cellular levels. In addition, it is concerned with generating and testing hypotheses about biological processes. However, such approaches are inevitably leading to computational challenges due to the high dimensionality of the data and the differences in the dimension of data from different cellular layers. This thesis focuses on the investigation and development of computational approaches to analyze metabolite profiles in the context of cellular networks. This leads to determining what aspects of the network functionality are reflected in the metabolite levels. With these methods at hand, this thesis aims to answer three questions: (1) how observability of biological systems is manifested in metabolite profiles and if it can be used for phenotypical comparisons; (2) how to identify couplings of reaction rates from metabolic profiles alone; and (3) which regulatory mechanism that affect metabolite levels can be distinguished by integrating transcriptomics and metabolomics read-outs. I showed that sensor metabolites, identified by an approach from observability theory, are more correlated to each other than non-sensors. The greater correlations between sensor metabolites were detected both with publicly available metabolite profiles and synthetic data simulated from a medium-scale kinetic model. I demonstrated through robustness analysis that correlation was due to the position of the sensor metabolites in the network and persisted irrespectively of the experimental conditions. Sensor metabolites are therefore potential candidates for phenotypical comparisons between conditions through targeted metabolic analysis. Furthermore, I demonstrated that the coupling of metabolic reaction rates can be investigated from a purely data-driven perspective, assuming that metabolic reactions can be described by mass action kinetics. Employing metabolite profiles from domesticated and wild wheat and tomato species, I showed that the process of domestication is associated with a loss of regulatory control on the level of reaction rate coupling. I also found that the same metabolic pathways in Arabidopsis thaliana and Escherichia coli exhibit differences in the number of reaction rate couplings. I designed a novel method for the identification and categorization of transcriptional effects on metabolism by combining data on gene expression and metabolite levels. The approach determines the partial correlation of metabolites with control by the principal components of the transcript levels. The principle components contain the majority of the transcriptomic information allowing to partial out the effect of the transcriptional layer from the metabolite profiles. Depending whether the correlation between metabolites persists upon controlling for the effect of the transcriptional layer, the approach allows us to group metabolite pairs into being associated due to post-transcriptional or transcriptional regulation, respectively. I showed that the classification of metabolite pairs into those that are associated due to transcriptional or post-transcriptional regulation are in agreement with existing literature and findings from a Bayesian inference approach. The approaches developed, implemented, and investigated in this thesis open novel ways to jointly study metabolomics and transcriptomics data as well as to place metabolic profiles in the network context. The results from these approaches have the potential to provide further insights into the regulatory machinery in a biological system.},
  language  = {en}
}
@article{SchwahnBeleggiaOmranianetal.2017,
  author    = {Schwahn, Kevin and Beleggia, Romina and Omranian, Nooshin and Nikoloski, Zoran},
  title     = {Stoichiometric Correlation Analysis: Principles of Metabolic Functionality from Metabolomics Data},
  series = {Frontiers in plant science},
  volume    = {8},
  journal   = {Frontiers in plant science},
  publisher = {Frontiers Research Foundation},
  address   = {Lausanne},
  issn      = {1664-462X},
  doi       = {10.3389/fpls.2017.02152},
  pages     = {12},
  year      = {2017},
  abstract  = {Recent advances in metabolomics technologies have resulted in high-quality (time-resolved) metabolic profiles with an increasing coverage of metabolic pathways. These data profiles represent read-outs from often non-linear dynamics of metabolic networks. Yet, metabolic profiles have largely been explored with regression-based approaches that only capture linear relationships, rendering it difficult to determine the extent to which the data reflect the underlying reaction rates and their couplings. Here we propose an approach termed Stoichiometric Correlation Analysis (SCA) based on correlation between positive linear combinations of log-transformed metabolic profiles. The log-transformation is due to the evidence that metabolic networks can be modeled by mass action law and kinetics derived from it. Unlike the existing approaches which establish a relation between pairs of metabolites, SCA facilitates the discovery of higherorder dependence between more than two metabolites. By using a paradigmatic model of the tricarboxylic acid cycle we show that the higher-order dependence reflects the coupling of concentration of reactant complexes, capturing the subtle difference between the employed enzyme kinetics. Using time-resolved metabolic profiles from Arabidopsis thaliana and Escherichia coli, we show that SCA can be used to quantify the difference in coupling of reactant complexes, and hence, reaction rates, underlying the stringent response in these model organisms. By using SCA with data from natural variation of wild and domesticated wheat and tomato accession, we demonstrate that the domestication is accompanied by loss of such couplings, in these species. Therefore, application of SCA to metabolomics data from natural variation in wild and domesticated populations provides a mechanistic way to understanding domestication and its relation to metabolic networks.},
  language  = {en}
}
@article{SchwahndeSouzaFernieetal.2014,
  author    = {Schwahn, Kevin and de Souza, Leonardo Perez and Fernie, Alisdair R. and Tohge, Takayuki},
  title     = {Metabolomics-assisted refinement of the pathways of steroidal glycoalkaloid biosynthesis in the tomato clade},
  series = {Journal of integrative plant biology},
  volume    = {56},
  journal   = {Journal of integrative plant biology},
  number    = {9},
  publisher = {Wiley-Blackwell},
  address   = {Hoboken},
  issn      = {1672-9072},
  doi       = {10.1111/jipb.12274},
  pages     = {864 -- 875},
  year      = {2014},
  abstract  = {Steroidal glycoalkaloids (SGAs) are nitrogen-containing secondary metabolites of the Solanum species, which are known to have large chemical and bioactive diversity in nature. While recent effort and development on LC/MS techniques for SGA profiling have elucidated the main pathways of SGA metabolism in tomato, the problem of peak annotation still remains due to the vast diversity of chemical structure and similar on overlapping of chemical formula. Here we provide a case study of peak classification and annotation approach by integration of species and tissue specificities of SGA accumulation for provision of comprehensive pathways of SGA biosynthesis. In order to elucidate natural diversity of SGA biosynthesis, a total of 169 putative SGAs found in eight tomato accessions (Solanum lycopersicum, S. pimpinellifolium, S. cheesmaniae, S. chmielewskii, S. neorickii, S. peruvianum, S. habrochaites, S. pennellii) and four tissue types were used for correlation analysis. The results obtained in this study contribute annotation and classification of SGAs as well as detecting putative novel biosynthetic branch points. As such this represents a novel strategy for peak annotation for plant secondary metabolites.},
  language  = {en}
}
@article{SchwahnNikoloski2018,
  author    = {Schwahn, Kevin and Nikoloski, Zoran},
  title     = {Data reduction approaches for dissecting transcriptional effects on metabolism},
  series = {Frontiers in plant science},
  volume    = {9},
  journal   = {Frontiers in plant science},
  publisher = {Frontiers Research Foundation},
  address   = {Lausanne},
  issn      = {1664-462X},
  doi       = {10.3389/fpls.2018.00538},
  pages     = {12},
  year      = {2018},
  abstract  = {The availability of high-throughput data from transcriptomics and metabolomics technologies provides the opportunity to characterize the transcriptional effects on metabolism. Here we propose and evaluate two computational approaches rooted in data reduction techniques to identify and categorize transcriptional effects on metabolism by combining data on gene expression and metabolite levels. The approaches determine the partial correlation between two metabolite data profiles upon control of given principal components extracted from transcriptomics data profiles. Therefore, they allow us to investigate both data types with all features simultaneously without doing preselection of genes. The proposed approaches allow us to categorize the relation between pairs of metabolites as being under transcriptional or post-transcriptional regulation. The resulting classification is compared to existing literature and accumulated evidence about regulatory mechanism of reactions and pathways in the cases of Escherichia coil, Saccharomycies cerevisiae, and Arabidopsis thaliana.},
  language  = {en}
}