@phdthesis{Steinhauser2004, author = {Steinhauser, Dirk}, title = {Inferring hypotheses from complex profile data - by means of CSB.DB, a comprehensive systems-biology database}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-2467}, school = {Universit{\"a}t Potsdam}, year = {2004}, abstract = {The past decades are characterized by various efforts to provide complete sequence information of genomes regarding various organisms. The availability of full genome data triggered the development of multiplex high-throughput assays allowing simultaneous measurement of transcripts, proteins and metabolites. With genome information and profiling technologies now in hand a highly parallel experimental biology is offering opportunities to explore and discover novel principles governing biological systems. Understanding biological complexity through modelling cellular systems represents the driving force which today allows shifting from a component-centric focus to integrative and systems level investigations. The emerging field of systems biology integrates discovery and hypothesis-driven science to provide comprehensive knowledge via computational models of biological systems. Within the context of evolving systems biology, investigations were made in large-scale computational analyses on transcript co-response data through selected prokaryotic and plant model organisms. CSB.DB - a comprehensive systems-biology database - (http://csbdb.mpimp-golm.mpg.de/) was initiated to provide public and open access to the results of biostatistical analyses in conjunction with additional biological knowledge. The database tool CSB.DB enables potential users to infer hypothesis about functional interrelation of genes of interest and may serve as future basis for more sophisticated means of elucidating gene function. The co-response concept and the CSB.DB database tool were successfully applied to predict operons in Escherichia coli by using the chromosomal distance and transcriptional co-responses. Moreover, examples were shown which indicate that transcriptional co-response analysis allows identification of differential promoter activities under different experimental conditions. The co-response concept was successfully transferred to complex organisms with the focus on the eukaryotic plant model organism Arabidopsis thaliana. The investigations made enabled the discovery of novel genes regarding particular physiological processes and beyond, allowed annotation of gene functions which cannot be accessed by sequence homology. GMD - the Golm Metabolome Database - was initiated and implemented in CSB.DB to integrated metabolite information and metabolite profiles. This novel module will allow addressing complex biological questions towards transcriptional interrelation and extent the recent systems level quest towards phenotyping.}, subject = {Datenbank}, language = {en} } @misc{Matzk2016, type = {Master Thesis}, author = {Matzk, S{\"o}ren}, title = {Predictive analysis of metabolic and preventive patient data}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-406103}, school = {Universit{\"a}t Potsdam}, pages = {XI, 63}, year = {2016}, abstract = {Every day huge amounts of medical records are stored by means of hospitals' and medical offices' software. These data are generally unconsidered in research. In this work anonymized everyday medical records ascertained in a physician's office, cov- ering holistic internal medicine in combination with orthomolecular medicine, are analyzed. Due to the lack of cooperation by the provider of the medical practice software a selection of diagnoses and anthropometric parameters was extracted manually. Information about patients' treatment are not available in this study. Nevertheless, data mining approaches in- cluding machine learning techniques are used to enable research, prevention and monitoring of patients' course of treatment. The potential of these everyday medical data is demonstrated by investigating co-morbidity and pyroluria which is a metabolic dysfunction indicated by increased levels of hydroxy- hemopyrrolin-2-one (HPL). It points out that the metabolic syndrome forms a cluster of its components and cancer, as well as mental disorders are grouped with thyroid diseases including autoimmune thyroid diseases. In contrast to prevailing assumptions in which it was estimated that approximately 10 \% of the population show increased levels of HPL, in this analysis 84.9 \% of the tested patients have an increased concentration of HPL. Prevention is illustrated by using decision tree models to predict diseases. Evaluation of the obtained model for Hashimoto's disease yield an accuracy of 87.5 \%. The model generated for hypothyroidism (accuracy of 60.9 \%) reveals shortcomings due to missing information about the treatment. Dynamics in the biomolecular status of 20 patients who have visited the medical office at least one time a year between 2010 and 2014 for laboratory tests are visualized by STATIS, a consensus analysis based on an extension to principal component analysis. Thereby, one can obtain patterns which are predestinated for specific diseases as hypertension. This study demonstrates that these often overlooked everyday data are challenging due to its sparsity and heterogeneity but its analysis is a great possibility to do research on disease profiles of real patients.}, language = {de} } @article{HavingaKoolAchilleetal.2016, author = {Havinga, Reinout and Kool, Anneleen and Achille, Frederic and Bavcon, Joze and Berg, Christian and Bonomi, Costantino and Burkart, Michael and De Meyere, Dirk and Havstrom, Mats and Kessler, Paul and Knickmann, Barbara and Koester, Nils and Martinez, Remy and Ostgaard, Havard and Ravnjak, Blanka and Scheen, Anne-Cathrine and Smith, Pamela and Smith, Paul and Socher, Stephanie A. and Vange, Vibekke}, title = {The Index Seminum: Seeds of change for seed exchange}, series = {Taxon}, volume = {65}, journal = {Taxon}, publisher = {International Association for Plant Taxonomy}, address = {Bratislava}, issn = {0040-0262}, doi = {10.12705/652.9}, pages = {333 -- 336}, year = {2016}, abstract = {Botanic gardens have been exchanging seeds through seed catalogues for centuries. In many gardens, these catalogues remain an important source of plant material. Living collections have become more relevant for genetic analysis and derived research, since genomics of non-model organisms heavily rely on living material. The range of species that is made available annually on all seed lists combined, provides an unsurpassed source of instantly accessible plant material for research collections. Still, the Index Seminum has received criticism in the past few decades. The current exchange model dictates that associated data is manually entered into each database. The amount of time involved and the human errors occurring in this process are difficult to justify when the data was initially produced as a report from another database. The authors propose that an online marketplace for seed exchange should be established, with enhanced search possibilities and downloadable accession data in a standardised format. Such online service should preferably be supervised and coordinated by Botanic Gardens Conservation International (BGCI). This manuscript is the outcome of a workshop on July 9th, 2015, at the European botanic gardens congress "Eurogard VII" in Paris, where the first two authors invited members of the botanic garden community to discuss how the anachronistic Index Seminum can be transformed into an improved and modern tool for seed exchange.}, language = {en} } @phdthesis{RianoPachon2008, author = {Ria{\~n}o-Pach{\´o}n, Diego Mauricio}, title = {Identification of transcription factor genes in plants}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-27009}, school = {Universit{\"a}t Potsdam}, year = {2008}, abstract = {In order to function properly, organisms have a complex control mechanism, in which a given gene is expressed at a particular time and place. One way to achieve this control is to regulate the initiation of transcription. This step requires the assembly of several components, i.e., a basal/general machinery common to all expressed genes, and a specific/regulatory machinery, which differs among genes and is the responsible for proper gene expression in response to environmental or developmental signals. This specific machinery is composed of transcription factors (TFs), which can be grouped into evolutionarily related gene families that possess characteristic protein domains. In this work we have exploited the presence of protein domains to create rules that serve for the identification and classification of TFs. We have modelled such rules as a bipartite graph, where families and protein domains are represented as nodes. Connections between nodes represent that a protein domain should (required rule) or should not (forbidden rule) be present in a protein to be assigned into a TF family. Following this approach we have identified putative complete sets of TFs in plant species, whose genome is completely sequenced: Cyanidioschyzon merolae (red algae), Chlamydomonas reinhardtii (green alga), Ostreococcus tauri (green alga), Physcomitrella patens (moss), Arabidopsis thaliana (thale cress), Populus trichocarpa (black cottonwood) and Oryza sativa (rice). The identification of the complete sets of TFs in the above-mentioned species, as well as additional information and reference literature are available at http://plntfdb.bio.uni-potsdam.de/. The availability of such sets allowed us performing detailed evolutionary studies at different levels, from a single family to all TF families in different organisms in a comparative genomics context. Notably, we uncovered preferential expansions in different lineages, paving the way to discover the specific biological roles of these proteins under different conditions. For the basic leucine zipper (bZIP) family of TFs we were able to infer that in the most recent common ancestor (MRCA) of all green plants there were at least four bZIP genes functionally involved in oxidative stress and unfolded protein responses that are bZIP-mediated processes in all eukaryotes, but also in light-dependent regulations. The four founder genes amplified and diverged significantly, generating traits that benefited the colonization of new environments. Currently, following the approach described above, up to 57 TF and 11 TR families can be identified, which are among the most numerous transcription regulatory families in plants. Three families of putative TFs predate the split between rhodophyta (red algae) and chlorophyta (green algae), i.e., G2-like, PLATZ, and RWPRK, and may have been of particular importance for the evolution of eukaryotic photosynthetic organisms. Nine additional families, i.e., ABI3/VP1, AP2-EREBP, ARR-B, C2C2-CO-like, C2C2-Dof, PBF-2-like/Whirly, Pseudo ARR-B, SBP, and WRKY, predate the split between green algae and streptophytes. The identification of putative complete list of TFs has also allowed the delineation of lineage-specific regulatory families. The families SBP, bHLH, SNF2, MADS, WRKY, HMG, AP2-EREBP and FHA significantly differ in size between algae and land plants. The SBP family of TFs is significantly larger in C. reinhardtii, compared to land plants, and appears to have been lost in the prasinophyte O. tauri. The families bHLH, SNF2, MADS, WRKY, HMG, AP2-EREBP and FHA preferentially expanded with the colonisation of land, and might have played an important role in this great moment in evolution. Later, after the split of bryophytes and tracheophytes, the families MADS, AP2-EREBP, NAC, AUX/IAA, PHD and HRT have significantly larger numbers in the lineage leading to seed plants. We identified 23 families that are restricted to land plants and that might have played an important role in the colonization of this new habitat. Based on the list of TFs in different species we have started to develop high-throughput experimental platforms (in rice and C. reinhardtii) to monitor gene expression changes of TF genes under different genetic, developmental or environmental conditions. In this work we present the monitoring of Arabidopsis thaliana TFs during the onset of senescence, a process that leads to cell and tissue disintegration in order to redistribute nutrients (e.g. nitrogen) from leaves to reproductive organs. We show that the expression of 185 TF genes changes when leaves develop from half to fully expanded leaves and finally enter partial senescence. 76\% of these TFs are down-regulated during senescence, the remaining are up-regulated. The identification of TFs in plants in a comparative genomics setup has proven fruitful for the understanding of evolutionary processes and contributes to the elucidation of complex developmental programs.}, language = {en} }