@phdthesis{Mutwil2011, author = {Mutwil, Marek}, title = {Integrative transcriptomic approaches to analyzing plant co-expression networks}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-50752}, school = {Universit{\"a}t Potsdam}, year = {2011}, abstract = {It is well documented that transcriptionally coordinated genes tend to be functionally related, and that such relationships may be conserved across different species, and even kingdoms. (Ihmels et al., 2004). Such relationships was initially utilized to reveal functional gene modules in yeast and mammals (Ihmels et al., 2004), and to explore orthologous gene functions between different species and kingdoms (Stuart et al., 2003; Bergmann et al., 2004). Model organisms, such as Arabidopsis, are readily used in basic research due to resource availability and relative speed of data acquisition. A major goal is to transfer the acquired knowledge from these model organisms to species that are of greater importance to our society. However, due to large gene families in plants, the identification of functional equivalents of well characterized Arabidopsis genes in other plants is a non-trivial task, which often returns erroneous or inconclusive results. In this thesis, concepts of utilizing co-expression networks to help infer (i) gene function, (ii) organization of biological processes and (iii) knowledge transfer between species are introduced. An often overlooked fact by bioinformaticians is that a bioinformatic method is as useful as its accessibility. Therefore, majority of the work presented in this thesis was directed on developing freely available, user-friendly web-tools accessible for any biologist.}, language = {en} } @phdthesis{Giorgi2011, author = {Giorgi, Federico Manuel}, title = {Expression-based reverse engineering of plant transcriptional networks}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-56760}, school = {Universit{\"a}t Potsdam}, year = {2011}, abstract = {Regulation of gene transcription plays a major role in mediating cellular responses and physiological behavior in all known organisms. The finding that similar genes are often regulated in a similar manner (co-regulated or "co-expressed") has directed several "guilt-by-association" approaches in order to reverse-engineer the cellular transcriptional networks using gene expression data as a compass. This kind of studies has been considerably assisted in the recent years by the development of high-throughput transcript measurement platforms, specifically gene microarrays and next-generation sequencing. In this thesis, I describe several approaches for improving the extraction and interpretation of the information contained in microarray based gene expression data, through four steps: (1) microarray platform design, (2) microarray data normalization, (3) gene network reverse engineering based on expression data and (4) experimental validation of expression-based guilt-by-association inferences. In the first part test case is shown aimed at the generation of a microarray for Thellungiella salsuginea, a salt and drought resistant close relative to the model plant Arabidopsis thaliana; the transcripts of this organism are generated on the combination of publicly available ESTs and newly generated ad-hoc next-generation sequencing data. Since the design of a microarray platform requires the availability of highly reliable and non-redundant transcript models, these issues are addressed consecutively, proposing several different technical solutions. In the second part I describe how inter-array correlation artifacts are generated by the common microarray normalization methods RMA and GCRMA, together with the technical and mathematical characteristics underlying the problem. A solution is proposed in the form of a novel normalization method, called tRMA. The third part of the thesis deals with the field of expression-based gene network reverse engineering. It is shown how different centrality measures in reverse engineered gene networks can be used to distinguish specific classes of genes, in particular essential genes in Arabidopsis thaliana, and how the use of conditional correlation can add a layer of understanding over the information flow processes underlying transcript regulation. Furthermore, several network reverse engineering approaches are compared, with a particular focus on the LASSO, a linear regression derivative rarely applied before in global gene network reconstruction, despite its theoretical advantages in robustness and interpretability over more standard methods. The performance of LASSO is assessed through several in silico analyses dealing with the reliability of the inferred gene networks. In the final part, LASSO and other reverse engineering methods are used to experimentally identify novel genes involved in two independent scenarios: the seed coat mucilage pathway in Arabidopsis thaliana and the hypoxic tuber development in Solanum tuberosum. In both cases an interesting method complementarity is shown, which strongly suggests a general use of hybrid approaches for transcript expression-based inferences. In conclusion, this work has helped to improve our understanding of gene transcription regulation through a better interpretation of high-throughput expression data. Part of the network reverse engineering methods described in this thesis have been included in a tool (CorTo) for gene network reverse engineering and annotated visualization from custom transcription datasets.}, language = {en} }