@phdthesis{Perscheid2023, author = {Perscheid, Cindy}, title = {Integrative biomarker detection using prior knowledge on gene expression data sets}, doi = {10.25932/publishup-58241}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-582418}, school = {Universit{\"a}t Potsdam}, pages = {ix, 197}, year = {2023}, abstract = {Gene expression data is analyzed to identify biomarkers, e.g. relevant genes, which serve for diagnostic, predictive, or prognostic use. Traditional approaches for biomarker detection select distinctive features from the data based exclusively on the signals therein, facing multiple shortcomings in regards to overfitting, biomarker robustness, and actual biological relevance. Prior knowledge approaches are expected to address these issues by incorporating prior biological knowledge, e.g. on gene-disease associations, into the actual analysis. However, prior knowledge approaches are currently not widely applied in practice because they are often use-case specific and seldom applicable in a different scope. This leads to a lack of comparability of prior knowledge approaches, which in turn makes it currently impossible to assess their effectiveness in a broader context. Our work addresses the aforementioned issues with three contributions. Our first contribution provides formal definitions for both prior knowledge and the flexible integration thereof into the feature selection process. Central to these concepts is the automatic retrieval of prior knowledge from online knowledge bases, which allows for streamlining the retrieval process and agreeing on a uniform definition for prior knowledge. We subsequently describe novel and generalized prior knowledge approaches that are flexible regarding the used prior knowledge and applicable to varying use case domains. Our second contribution is the benchmarking platform Comprior. Comprior applies the aforementioned concepts in practice and allows for flexibly setting up comprehensive benchmarking studies for examining the performance of existing and novel prior knowledge approaches. It streamlines the retrieval of prior knowledge and allows for combining it with prior knowledge approaches. Comprior demonstrates the practical applicability of our concepts and further fosters the overall development and comparability of prior knowledge approaches. Our third contribution is a comprehensive case study on the effectiveness of prior knowledge approaches. For that, we used Comprior and tested a broad range of both traditional and prior knowledge approaches in combination with multiple knowledge bases on data sets from multiple disease domains. Ultimately, our case study constitutes a thorough assessment of a) the suitability of selected knowledge bases for integration, b) the impact of prior knowledge being applied at different integration levels, and c) the improvements in terms of classification performance, biological relevance, and overall robustness. In summary, our contributions demonstrate that generalized concepts for prior knowledge and a streamlined retrieval process improve the applicability of prior knowledge approaches. Results from our case study show that the integration of prior knowledge positively affects biomarker results, particularly regarding their robustness. Our findings provide the first in-depth insights on the effectiveness of prior knowledge approaches and build a valuable foundation for future research.}, language = {en} } @misc{HuynenSuzukiOguraetal.2014, author = {Huynen, Leon and Suzuki, Takayuki and Ogura, Toshihiko and Watanabe, Yusuke and Millar, Craig D. and Hofreiter, Michael and Smith, Craig and Mirmoeini, Sara and Lambert, David M.}, title = {Reconstruction and in vivo analysis of the extinct tbx5 gene from ancient wingless moa (Aves: Dinornithiformes)}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {1117}, issn = {1866-8372}, doi = {10.25932/publishup-43159}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-431599}, pages = {10}, year = {2014}, abstract = {Background The forelimb-specific gene tbx5 is highly conserved and essential for the development of forelimbs in zebrafish, mice, and humans. Amongst birds, a single order, Dinornithiformes, comprising the extinct wingless moa of New Zealand, are unique in having no skeletal evidence of forelimb-like structures. Results To determine the sequence of tbx5 in moa, we used a range of PCR-based techniques on ancient DNA to retrieve all nine tbx5 exons and splice sites from the giant moa, Dinornis. Moa Tbx5 is identical to chicken Tbx5 in being able to activate the downstream promotors of fgf10 and ANF. In addition we show that missexpression of moa tbx5 in the hindlimb of chicken embryos results in the formation of forelimb features, suggesting that Tbx5 was fully functional in wingless moa. An alternatively spliced exon 1 for tbx5 that is expressed specifically in the forelimb region was shown to be almost identical between moa and ostrich, suggesting that, as well as being fully functional, tbx5 is likely to have been expressed normally in moa since divergence from their flighted ancestors, approximately 60 mya. Conclusions The results suggests that, as in mice, moa tbx5 is necessary for the induction of forelimbs, but is not sufficient for their outgrowth. Moa Tbx5 may have played an important role in the development of moa's remnant forelimb girdle, and may be required for the formation of this structure. Our results further show that genetic changes affecting genes other than tbx5 must be responsible for the complete loss of forelimbs in moa.}, language = {en} } @misc{SrivastavaMurugaiyanGarciaetal.2020, author = {Srivastava, Abhishek and Murugaiyan, Jayaseelan and Garcia, Juan A. L. and De Corte, Daniele and Hoetzinger, Matthias and Eravci, Murat and Weise, Christoph and Kumar, Yadhu and Roesler, Uwe and Hahn, Martin W. and Grossart, Hans-Peter}, title = {Combined Methylome, Transcriptome and Proteome Analyses Document Rapid Acclimatization of a Bacterium to Environmental Changes}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch Naturwissenschaftliche Reihe}, number = {1011}, issn = {1866-8372}, doi = {10.25932/publishup-48199}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-481993}, pages = {23}, year = {2020}, abstract = {Polynucleobacter asymbioticus strain QLW-P1DMWA-1T represents a group of highly successful heterotrophic ultramicrobacteria that is frequently very abundant (up to 70\% of total bacterioplankton) in freshwater habitats across all seven continents. This strain was originally isolated from a shallow Alpine pond characterized by rapid changes in water temperature and elevated UV radiation due to its location at an altitude of 1300 m. To elucidate the strain's adjustment to fluctuating environmental conditions, we recorded changes occurring in its transcriptomic and proteomic profiles under contrasting experimental conditions by simulating thermal conditions in winter and summer as well as high UV irradiation. To analyze the potential connection between gene expression and regulation via methyl group modification of the genome, we also analyzed its methylome. The methylation pattern differed between the three treatments, pointing to its potential role in differential gene expression. An adaptive process due to evolutionary pressure in the genus was deduced by calculating the ratios of non-synonymous to synonymous substitution rates for 20 Polynucleobacter spp. genomes obtained from geographically diverse isolates. The results indicate purifying selection.}, language = {en} } @article{SrivastavaMurugaiyanGarciaetal.2020, author = {Srivastava, Abhishek and Murugaiyan, Jayaseelan and Garcia, Juan A. L. and De Corte, Daniele and Hoetzinger, Matthias and Eravci, Murat and Weise, Christoph and Kumar, Yadhu and Roesler, Uwe and Hahn, Martin W. and Grossart, Hans-Peter}, title = {Combined Methylome, Transcriptome and Proteome Analyses Document Rapid Acclimatization of a Bacterium to Environmental Changes}, series = {Frontiers in Microbiology}, volume = {11}, journal = {Frontiers in Microbiology}, publisher = {Frontiers Media}, address = {Lausanne}, issn = {1664-302X}, doi = {10.3389/fmicb.2020.544785}, pages = {21}, year = {2020}, abstract = {Polynucleobacter asymbioticus strain QLW-P1DMWA-1T represents a group of highly successful heterotrophic ultramicrobacteria that is frequently very abundant (up to 70\% of total bacterioplankton) in freshwater habitats across all seven continents. This strain was originally isolated from a shallow Alpine pond characterized by rapid changes in water temperature and elevated UV radiation due to its location at an altitude of 1300 m. To elucidate the strain's adjustment to fluctuating environmental conditions, we recorded changes occurring in its transcriptomic and proteomic profiles under contrasting experimental conditions by simulating thermal conditions in winter and summer as well as high UV irradiation. To analyze the potential connection between gene expression and regulation via methyl group modification of the genome, we also analyzed its methylome. The methylation pattern differed between the three treatments, pointing to its potential role in differential gene expression. An adaptive process due to evolutionary pressure in the genus was deduced by calculating the ratios of non-synonymous to synonymous substitution rates for 20 Polynucleobacter spp. genomes obtained from geographically diverse isolates. The results indicate purifying selection.}, language = {en} } @article{DennisPatelOliveretal.2017, author = {Dennis, Alice B. and Patel, Vilas and Oliver, Kerry M. and Vorburger, Christoph}, title = {Parasitoid gene expression changes after adaptation to symbiont-protected hosts}, series = {Evolution}, volume = {71}, journal = {Evolution}, publisher = {Wiley}, address = {Hoboken}, issn = {0014-3820}, doi = {10.1111/evo.13333}, pages = {2599 -- 2617}, year = {2017}, abstract = {Reciprocal selection between aphids, their protective endosymbionts, and the parasitoid wasps that prey upon them offers an opportunity to study the basis of their coevolution. We investigated adaptation to symbiont\&\#8208;conferred defense by rearing the parasitoid wasp Lysiphlebus fabarum on aphids (Aphis fabae) possessing different defensive symbiont strains (Hamiltonella defensa). After ten generations of experimental evolution, wasps showed increased abilities to parasitize aphids possessing the H. defensa strain they evolved with, but not aphids possessing the other strain. We show that the two symbiont strains encode different toxins, potentially creating different targets for counter\&\#8208;adaptation. Phenotypic and behavioral comparisons suggest that neither life\&\#8208;history traits nor oviposition behavior differed among evolved parasitoid lineages. In contrast, comparative transcriptomics of adult female wasps identified a suite of differentially expressed genes among lineages, even when reared in a common, symbiont\&\#8208;free, aphid host. In concurrence with the specificity of each parasitoid lineages' infectivity, most differentially expressed parasitoid transcripts were also lineage\&\#8208;specific. These transcripts are enriched with putative venom toxins and contain highly expressed, potentially defensive viral particles. Together, these results suggest that wild populations of L. fabarum employ a complicated offensive arsenal with sufficient genetic variation for wasps to adapt rapidly and specifically to their hosts' microbial defenses.}, language = {en} } @misc{MachensBalazadehMuellerRoeberetal.2017, author = {Machens, Fabian and Balazadeh, Salma and M{\"u}ller-R{\"o}ber, Bernd and Messerschmidt, Katrin}, title = {Synthetic Promoters and Transcription Factors for Heterologous Protein Expression in Saccharomyces cerevisiae}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-403804}, pages = {11}, year = {2017}, abstract = {Orthogonal systems for heterologous protein expression as well as for the engineering of synthetic gene regulatory circuits in hosts like Saccharomyces cerevisiae depend on synthetic transcription factors (synTFs) and corresponding cis-regulatory binding sites. We have constructed and characterized a set of synTFs based on either transcription activator-like effectors or CRISPR/Cas9, and corresponding small synthetic promoters (synPs) with minimal sequence identity to the host's endogenous promoters. The resulting collection of functional synTF/synP pairs confers very low background expression under uninduced conditions, while expression output upon induction of the various synTFs covers a wide range and reaches induction factors of up to 400. The broad spectrum of expression strengths that is achieved will be useful for various experimental setups, e.g., the transcriptional balancing of expression levels within heterologous pathways or the construction of artificial regulatory networks. Furthermore, our analyses reveal simple rules that enable the tuning of synTF expression output, thereby allowing easy modification of a given synTF/synP pair. This will make it easier for researchers to construct tailored transcriptional control systems.}, language = {en} } @article{MachensBalazadehMuellerRoeberetal.2017, author = {Machens, Fabian and Balazadeh, Salma and M{\"u}ller-R{\"o}ber, Bernd and Messerschmidt, Katrin}, title = {Synthetic Promoters and Transcription Factors for Heterologous Protein Expression in Saccharomyces cerevisiae}, series = {Frontiers in Bioengineering and Biotechnology}, volume = {5}, journal = {Frontiers in Bioengineering and Biotechnology}, publisher = {Frontiers}, address = {Lausanne}, issn = {2296-4185}, doi = {10.3389/fbioe.2017.00063}, pages = {1 -- 11}, year = {2017}, abstract = {Orthogonal systems for heterologous protein expression as well as for the engineering of synthetic gene regulatory circuits in hosts like Saccharomyces cerevisiae depend on synthetic transcription factors (synTFs) and corresponding cis-regulatory binding sites. We have constructed and characterized a set of synTFs based on either transcription activator-like effectors or CRISPR/Cas9, and corresponding small synthetic promoters (synPs) with minimal sequence identity to the host's endogenous promoters. The resulting collection of functional synTF/synP pairs confers very low background expression under uninduced conditions, while expression output upon induction of the various synTFs covers a wide range and reaches induction factors of up to 400. The broad spectrum of expression strengths that is achieved will be useful for various experimental setups, e.g., the transcriptional balancing of expression levels within heterologous pathways or the construction of artificial regulatory networks. Furthermore, our analyses reveal simple rules that enable the tuning of synTF expression output, thereby allowing easy modification of a given synTF/synP pair. This will make it easier for researchers to construct tailored transcriptional control systems.}, language = {en} } @article{KlieNikoloskiSelbig2014, author = {Klie, Sebastian and Nikoloski, Zoran and Selbig, Joachim}, title = {Biological cluster evaluation for gene function prediction}, series = {Journal of computational biology}, volume = {21}, journal = {Journal of computational biology}, number = {6}, publisher = {Liebert}, address = {New Rochelle}, issn = {1066-5277}, doi = {10.1089/cmb.2009.0129}, pages = {428 -- 445}, year = {2014}, abstract = {Recent advances in high-throughput omics techniques render it possible to decode the function of genes by using the "guilt-by-association" principle on biologically meaningful clusters of gene expression data. However, the existing frameworks for biological evaluation of gene clusters are hindered by two bottleneck issues: (1) the choice for the number of clusters, and (2) the external measures which do not take in consideration the structure of the analyzed data and the ontology of the existing biological knowledge. Here, we address the identified bottlenecks by developing a novel framework that allows not only for biological evaluation of gene expression clusters based on existing structured knowledge, but also for prediction of putative gene functions. The proposed framework facilitates propagation of statistical significance at each of the following steps: (1) estimating the number of clusters, (2) evaluating the clusters in terms of novel external structural measures, (3) selecting an optimal clustering algorithm, and (4) predicting gene functions. The framework also includes a method for evaluation of gene clusters based on the structure of the employed ontology. Moreover, our method for obtaining a probabilistic range for the number of clusters is demonstrated valid on synthetic data and available gene expression profiles from Saccharomyces cerevisiae. Finally, we propose a network-based approach for gene function prediction which relies on the clustering of optimal score and the employed ontology. Our approach effectively predicts gene function on the Saccharomyces cerevisiae data set and is also employed to obtain putative gene functions for an Arabidopsis thaliana data set.}, language = {en} } @article{OmidbakhshfardWinckArvidssonetal.2014, author = {Omidbakhshfard, Mohammad Amin and Winck, Flavia Vischi and Arvidsson, Samuel Janne and Riano-Pachon, Diego M. and M{\"u}ller-R{\"o}ber, Bernd}, title = {A step-by-step protocol for formaldehyde-assisted isolation of regulatory elements from Arabidopsis thaliana}, series = {Journal of integrative plant biology}, volume = {56}, journal = {Journal of integrative plant biology}, number = {6}, publisher = {Wiley-Blackwell}, address = {Hoboken}, issn = {1672-9072}, doi = {10.1111/jipb.12151}, pages = {527 -- 538}, year = {2014}, abstract = {The control of gene expression by transcriptional regulators and other types of functionally relevant DNA transactions such as chromatin remodeling and replication underlie a vast spectrum of biological processes in all organisms. DNA transactions require the controlled interaction of proteins with DNA sequence motifs which are often located in nucleosome-depleted regions (NDRs) of the chromatin. Formaldehyde-assisted isolation of regulatory elements (FAIRE) has been established as an easy-to-implement method for the isolation of NDRs from a number of eukaryotic organisms, and it has been successfully employed for the discovery of new regulatory segments in genomic DNA from, for example, yeast, Drosophila, and humans. Until today, however, FAIRE has only rarely been employed in plant research and currently no detailed FAIRE protocol for plants has been published. Here, we provide a step-by-step FAIRE protocol for NDR discovery in Arabidopsis thaliana. We demonstrate that NDRs isolated from plant chromatin are readily amenable to quantitative polymerase chain reaction and next-generation sequencing. Only minor modification of the FAIRE protocol will be needed to adapt it to other plants, thus facilitating the global inventory of regulatory regions across species.}, language = {en} } @article{BalazadehSchildhauerAraujoetal.2014, author = {Balazadeh, Salma and Schildhauer, Joerg and Araujo, Wagner L. and Munne-Bosch, Sergi and Fernie, Alisdair R. and Proost, Sebastian and Humbeck, Klaus and M{\"u}ller-R{\"o}ber, Bernd}, title = {Reversal of senescence by N resupply to N-starved Arabidopsis thaliana: transcriptomic and metabolomic consequences}, series = {Journal of experimental botany}, volume = {65}, journal = {Journal of experimental botany}, number = {14}, publisher = {Oxford Univ. Press}, address = {Oxford}, issn = {0022-0957}, doi = {10.1093/jxb/eru119}, pages = {3975 -- 3992}, year = {2014}, abstract = {Leaf senescence is a developmentally controlled process, which is additionally modulated by a number of adverse environmental conditions. Nitrogen shortage is a well-known trigger of precocious senescence in many plant species including crops, generally limiting biomass and seed yield. However, leaf senescence induced by nitrogen starvation may be reversed when nitrogen is resupplied at the onset of senescence. Here, the transcriptomic, hormonal, and global metabolic rearrangements occurring during nitrogen resupply-induced reversal of senescence in Arabidopsis thaliana were analysed. The changes induced by senescence were essentially in keeping with those previously described; however, these could, by and large, be reversed. The data thus indicate that plants undergoing senescence retain the capacity to sense and respond to the availability of nitrogen nutrition. The combined data are discussed in the context of the reversibility of the senescence programme and the evolutionary benefit afforded thereby. Future prospects for understanding and manipulating this process in both Arabidopsis and crop plants are postulated.}, language = {en} }