@article{KlieNikoloskiSelbig2014, author = {Klie, Sebastian and Nikoloski, Zoran and Selbig, Joachim}, title = {Biological cluster evaluation for gene function prediction}, series = {Journal of computational biology}, volume = {21}, journal = {Journal of computational biology}, number = {6}, publisher = {Liebert}, address = {New Rochelle}, issn = {1066-5277}, doi = {10.1089/cmb.2009.0129}, pages = {428 -- 445}, year = {2014}, abstract = {Recent advances in high-throughput omics techniques render it possible to decode the function of genes by using the "guilt-by-association" principle on biologically meaningful clusters of gene expression data. However, the existing frameworks for biological evaluation of gene clusters are hindered by two bottleneck issues: (1) the choice for the number of clusters, and (2) the external measures which do not take in consideration the structure of the analyzed data and the ontology of the existing biological knowledge. Here, we address the identified bottlenecks by developing a novel framework that allows not only for biological evaluation of gene expression clusters based on existing structured knowledge, but also for prediction of putative gene functions. The proposed framework facilitates propagation of statistical significance at each of the following steps: (1) estimating the number of clusters, (2) evaluating the clusters in terms of novel external structural measures, (3) selecting an optimal clustering algorithm, and (4) predicting gene functions. The framework also includes a method for evaluation of gene clusters based on the structure of the employed ontology. Moreover, our method for obtaining a probabilistic range for the number of clusters is demonstrated valid on synthetic data and available gene expression profiles from Saccharomyces cerevisiae. Finally, we propose a network-based approach for gene function prediction which relies on the clustering of optimal score and the employed ontology. Our approach effectively predicts gene function on the Saccharomyces cerevisiae data set and is also employed to obtain putative gene functions for an Arabidopsis thaliana data set.}, language = {en} } @article{NikoloskiGrimbsKlieetal.2011, author = {Nikoloski, Zoran and Grimbs, Sergio and Klie, Sebastian and Selbig, Joachim}, title = {Complexity of automated gene annotation}, series = {Biosystems : journal of biological and information processing sciences}, volume = {104}, journal = {Biosystems : journal of biological and information processing sciences}, number = {1}, publisher = {Elsevier}, address = {Oxford}, issn = {0303-2647}, doi = {10.1016/j.biosystems.2010.12.003}, pages = {1 -- 8}, year = {2011}, abstract = {Integration of high-throughput data with functional annotation by graph-theoretic methods has been postulated as promising way to unravel the function of unannotated genes. Here, we first review the existing graph-theoretic approaches for automated gene function annotation and classify them into two categories with respect to their relation to two instances of transductive learning on networks - with dynamic costs and with constant costs - depending on whether or not ontological relationship between functional terms is employed. The determined categories allow to characterize the computational complexity of the existing approaches and establish the relation to classical graph-theoretic problems, such as bisection and multiway cut. In addition, our results point out that the ontological form of the structured functional knowledge does not lower the complexity of the transductive learning with dynamic costs - one of the key problems in modern systems biology. The NP-hardness of automated gene annotation renders the development of heuristic or approximation algorithms a priority for additional research.}, language = {en} } @misc{OmranianKleessenTohgeetal.2015, author = {Omranian, Nooshin and Kleessen, Sabrina and Tohge, Takayuki and Klie, Sebastian and Basler, Georg and M{\"u}ller-R{\"o}ber, Bernd and Fernie, Alisdair R. and Nikoloski, Zoran}, title = {Differential metabolic and coexpression networks of plant metabolism}, series = {Trends in plant science}, volume = {20}, journal = {Trends in plant science}, number = {5}, publisher = {Elsevier}, address = {London}, issn = {1360-1385}, doi = {10.1016/j.tplants.2015.02.002}, pages = {266 -- 268}, year = {2015}, abstract = {Recent analyses have demonstrated that plant metabolic networks do not differ in their structural properties and that genes involved in basic metabolic processes show smaller coexpression than genes involved in specialized metabolism. By contrast, our analysis reveals differences in the structure of plant metabolic networks and patterns of coexpression for genes in (non)specialized metabolism. Here we caution that conclusions concerning the organization of plant metabolism based on network-driven analyses strongly depend on the computational approaches used.}, language = {en} } @article{BordagKlieJuerchottetal.2015, author = {Bordag, Natalie and Klie, Sebastian and J{\"u}rchott, Kathrin and Vierheller, Janine and Schiewe, Hajo and Albrecht, Valerie and Tonn, J{\"o}rg-Christian and Schwartz, Christoph and Schichor, Christian and Selbig, Joachim}, title = {Glucocorticoid (dexamethasone)-induced metabolome changes in healthy males suggest prediction of response and side effects}, series = {Scientific reports}, volume = {5}, journal = {Scientific reports}, publisher = {Nature Publ. Group}, address = {London}, issn = {2045-2322}, doi = {10.1038/srep15954}, pages = {12}, year = {2015}, abstract = {Glucocorticoids are indispensable anti-inflammatory and decongestant drugs with high prevalence of use at (similar to)0.9\% of the adult population. Better holistic insights into glucocorticoid-induced changes are crucial for effective use as concurrent medication and management of adverse effects. The profiles of 214 metabolites from plasma of 20 male healthy volunteers were recorded prior to and after ingestion of a single dose of 4 mg dexamethasone (+20 mg pantoprazole). Samples were drawn at three predefined time points per day: seven untreated (day 1 midday - day 3 midday) and four treated (day 3 evening - day 4 evening) per volunteer. Statistical analysis revealed tremendous impact of dexamethasone on the metabolome with 150 of 214 metabolites being significantly deregulated on at least one time point after treatment (ANOVA, Benjamini-Hochberg corrected, q < 0.05). Inter-person variability was high and remained uninfluenced by treatment. The clearly visible circadian rhythm prior to treatment was almost completely suppressed and deregulated by dexamethasone. The results draw a holistic picture of the severe metabolic deregulation induced by single-dose, short-term glucocorticoid application. The observed metabolic changes suggest a potential for early detection of severe side effects, raising hope for personalized early countermeasures increasing quality of life and reducing health care costs.}, language = {en} } @phdthesis{Klie2011, author = {Klie, Sebastian}, title = {Integrative analysis of hight-throughput "omics"-data and structured biological knowledge}, address = {Potsdam}, pages = {102 S.}, year = {2011}, language = {en} } @article{OmranianKlieMuellerRoeberetal.2013, author = {Omranian, Nooshin and Klie, Sebastian and M{\"u}ller-R{\"o}ber, Bernd and Nikoloski, Zoran}, title = {Network-based segmentation of biological multivariate time series}, series = {PLoS one}, volume = {8}, journal = {PLoS one}, number = {5}, publisher = {PLoS}, address = {San Fransisco}, issn = {1932-6203}, doi = {10.1371/journal.pone.0062974}, pages = {10}, year = {2013}, abstract = {Molecular phenotyping technologies (e.g., transcriptomics, proteomics, and metabolomics) offer the possibility to simultaneously obtain multivariate time series (MTS) data from different levels of information processing and metabolic conversions in biological systems. As a result, MTS data capture the dynamics of biochemical processes and components whose couplings may involve different scales and exhibit temporal changes. Therefore, it is important to develop methods for determining the time segments in MTS data, which may correspond to critical biochemical events reflected in the coupling of the system's components. Here we provide a novel network-based formalization of the MTS segmentation problem based on temporal dependencies and the covariance structure of the data. We demonstrate that the problem of partitioning MTS data into k segments to maximize a distance function, operating on polynomially computable network properties, often used in analysis of biological network, can be efficiently solved. To enable biological interpretation, we also propose a breakpoint-penalty (BP-penalty) formulation for determining MTS segmentation which combines a distance function with the number/length of segments. Our empirical analyses of synthetic benchmark data as well as time-resolved transcriptomics data from the metabolic and cell cycles of Saccharomyces cerevisiae demonstrate that the proposed method accurately infers the phases in the temporal compartmentalization of biological processes. In addition, through comparison on the same data sets, we show that the results from the proposed formalization of the MTS segmentation problem match biological knowledge and provide more rigorous statistical support in comparison to the contending state-of-the-art methods.}, language = {en} }