@article{BarlowHartmannGonzalezetal.2020, author = {Barlow, Axel and Hartmann, Stefanie and Gonzalez, Javier and Hofreiter, Michael and Paijmans, Johanna L. A.}, title = {Consensify}, series = {Genes / Molecular Diversity Preservation International}, volume = {11}, journal = {Genes / Molecular Diversity Preservation International}, number = {1}, publisher = {MDPI}, address = {Basel}, issn = {2073-4425}, doi = {10.3390/genes11010050}, pages = {22}, year = {2020}, abstract = {A standard practise in palaeogenome analysis is the conversion of mapped short read data into pseudohaploid sequences, frequently by selecting a single high-quality nucleotide at random from the stack of mapped reads. This controls for biases due to differential sequencing coverage, but it does not control for differential rates and types of sequencing error, which are frequently large and variable in datasets obtained from ancient samples. These errors have the potential to distort phylogenetic and population clustering analyses, and to mislead tests of admixture using D statistics. We introduce Consensify, a method for generating pseudohaploid sequences, which controls for biases resulting from differential sequencing coverage while greatly reducing error rates. The error correction is derived directly from the data itself, without the requirement for additional genomic resources or simplifying assumptions such as contemporaneous sampling. For phylogenetic and population clustering analysis, we find that Consensify is less affected by artefacts than methods based on single read sampling. For D statistics, Consensify is more resistant to false positives and appears to be less affected by biases resulting from different laboratory protocols than other frequently used methods. Although Consensify is developed with palaeogenomic data in mind, it is applicable for any low to medium coverage short read datasets. We predict that Consensify will be a useful tool for future studies of palaeogenomes.}, language = {en} } @article{ChristopherAshwoodBittremieuxDeutschetal.2020, author = {Christopher Ashwood, Wout Bittremieux and Bittremieux, Wout and Deutsch, Eric W. and Doncheva, Nadezhda T. and Dorfer, Viktoria and Gabriels, Ralf and Gorshkov, Vladimir and Gupta, Surya and Jones, Andrew R. and K{\"a}ll, Lukas and Kopczynski, Dominik and Lane, Lydie and Lautenbacher, Ludwig and Legeay, Marc and Locard-Paulet, Marie and Mesuere, Bart and Sachsenberg, Timo and Salz, Renee and Samaras, Patroklos and Schiebenhoefer, Henning and Schmidt, Tobias and Schw{\"a}mmle, Veit and Soggiu, Alessio and Uszkoreit, Julian and Van Den Bossche, Tim and Van Puyvelde, Bart and Van Strien, Joeri and Verschaffelt, Pieter and Webel, Henry and Willems, Sander and Perez-Riverolab, Yasset and Netz, Eugen and Pfeuffer, Julianus}, title = {Proceedings of the EuBIC-MS 2020 Developers' Meeting}, series = {EuPA Open Proteomics}, volume = {24}, journal = {EuPA Open Proteomics}, publisher = {Elsevier}, address = {Amsterdam}, issn = {2212-9685}, doi = {10.1016/j.euprot.2020.11.001}, pages = {1 -- 6}, year = {2020}, abstract = {The 2020 European Bioinformatics Community for Mass Spectrometry (EuBIC-MS) Developers' meeting was held from January 13th to January 17th 2020 in Nyborg, Denmark. Among the participants were scientists as well as developers working in the field of computational mass spectrometry (MS) and proteomics. The 4-day program was split between introductory keynote lectures and parallel hackathon sessions. During the latter, the participants developed bioinformatics tools and resources addressing outstanding needs in the community. The hackathons allowed less experienced participants to learn from more advanced computational MS experts, and to actively contribute to highly relevant research projects. We successfully produced several new tools that will be useful to the proteomics community by improving data analysis as well as facilitating future research. All keynote recordings are available on https://doi.org/10.5281/zenodo.3890181.}, language = {en} } @article{LuoChenZengetal.2018, author = {Luo, Ting and Chen, Xiaoyi and Zeng, Shufei and Guan, Baozhang and Hu, Bo and Meng, Yu and Liu, Fanna and Wong, Taksui and Lu, Yongpin and Yun, Chen and Hocher, Berthold and Yin, Lianghong}, title = {Bioinformatic identification of key genes and analysis of prognostic values in clear cell renal cell carcinoma}, series = {Oncology Letters}, volume = {16}, journal = {Oncology Letters}, number = {2}, publisher = {Spandidos publ LTD}, address = {Athens}, issn = {1792-1074}, doi = {10.3892/ol.2018.8842}, pages = {1747 -- 1757}, year = {2018}, abstract = {The present study aimed to identify new key genes as potential biomarkers for the diagnosis, prognosis or targeted therapy of clear cell renal cell carcinoma (ccRCC). Three expression profiles (GSE36895, GSE46699 and GSE71963) were collected from Gene Expression Omnibus. GEO2R was used to identify differentially expressed genes (DEGs) in ccRCC tissues and normal samples. The Database for Annotation, Visualization and Integrated Discovery was utilized for functional and pathway enrichment analysis. STRING v10.5 and Molecular Complex Detection were used for protein-protein interaction (PPI) network construction and module analysis, respectively. Regulation network analyses were performed with the WebGestal tool. UALCAN web-portal was used for expression validation and survival analysis of hub genes in ccRCC patients from The Cancer Genome Atlas (TCGA). A total of 65 up- and 164 downregulated genes were identified as DEGs. DEGs were enriched with functional terms and pathways compactly related to ccRCC pathogenesis. Seventeen hub genes and one significant module were filtered out and selected from the PPI network. The differential expression of hub genes was verified in TCGA patients. Kaplan-Meier plot showed that high mRNA expression of enolase 2 (ENO2) was associated with short overall survival in ccRCC patients (P=0.023). High mRNA expression of cyclin D1 (CCND1) (P<0.001), fms related tyrosine kinase 1 (FLT1) (P=0.004), plasminogen (PLG) (P<0.001) and von Willebrand factor (VWF) (P=0.008) appeared to serve as favorable factors in survival. These findings indicate that the DEGs may be key genes in ccRCC pathogenesis and five genes, including ENO2, CCND1, PLT1, PLG and VWF, may serve as potential prognostic biomarkers in ccRCC.}, language = {en} } @article{FriouxSchaubSchellhornetal.2019, author = {Frioux, Cl{\´e}mence and Schaub, Torsten H. and Schellhorn, Sebastian and Siegel, Anne and Wanko, Philipp}, title = {Hybrid metabolic network completion}, series = {Theory and practice of logic programming}, volume = {19}, journal = {Theory and practice of logic programming}, number = {1}, publisher = {Cambridge University Press}, address = {New York}, issn = {1471-0684}, doi = {10.1017/S1471068418000455}, pages = {83 -- 108}, year = {2019}, abstract = {Metabolic networks play a crucial role in biology since they capture all chemical reactions in an organism. While there are networks of high quality for many model organisms, networks for less studied organisms are often of poor quality and suffer from incompleteness. To this end, we introduced in previous work an answer set programming (ASP)-based approach to metabolic network completion. Although this qualitative approach allows for restoring moderately degraded networks, it fails to restore highly degraded ones. This is because it ignores quantitative constraints capturing reaction rates. To address this problem, we propose a hybrid approach to metabolic network completion that integrates our qualitative ASP approach with quantitative means for capturing reaction rates. We begin by formally reconciling existing stoichiometric and topological approaches to network completion in a unified formalism. With it, we develop a hybrid ASP encoding and rely upon the theory reasoning capacities of the ASP system dingo for solving the resulting logic program with linear constraints over reals. We empirically evaluate our approach by means of the metabolic network of Escherichia coli. Our analysis shows that our novel approach yields greatly superior results than obtainable from purely qualitative or quantitative approaches.}, language = {en} } @article{KuckelkornStueblerTextorisTaubeetal.2019, author = {Kuckelkorn, Ulrike and St{\"u}bler, Sabine and Textoris-Taube, Kathrin and Kilian, Christiane and Niewienda, Agathe and Henklein, Petra and Janek, Katharina and Stumpf, Michael P. H. and Mishto, Michele and Liepe, Juliane}, title = {Proteolytic dynamics of human 20S thymoproteasome}, series = {The journal of biological chemistry}, volume = {294}, journal = {The journal of biological chemistry}, number = {19}, publisher = {American Society for Biochemistry and Molecular Biology}, address = {Bethesda}, issn = {1083-351X}, doi = {10.1074/jbc.RA118.007347}, pages = {7740 -- 7754}, year = {2019}, abstract = {An efficient immunosurveillance of CD8(+) T cells in the periphery depends on positive/negative selection of thymocytes and thus on the dynamics of antigen degradation and epitope production by thymoproteasome and immunoproteasome in the thymus. Although studies in mouse systems have shown how thymoproteasome activity differs from that of immunoproteasome and strongly impacts the T cell repertoire, the proteolytic dynamics and the regulation of human thymoproteasome are unknown. By combining biochemical and computational modeling approaches, we show here that human 20S thymoproteasome and immunoproteasome differ not only in the proteolytic activity of the catalytic sites but also in the peptide transport. These differences impinge upon the quantity of peptide products rather than where the substrates are cleaved. The comparison of the two human 20S proteasome isoforms depicts different processing of antigens that are associated to tumors and autoimmune diseases.}, language = {en} } @misc{BarlowHartmannGonzalezetal.2020, author = {Barlow, Axel and Hartmann, Stefanie and Gonzalez, Javier and Hofreiter, Michael and Paijmans, Johanna L. A.}, title = {Consensify}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {1033}, issn = {1866-8372}, doi = {10.25932/publishup-47252}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-472521}, pages = {24}, year = {2020}, abstract = {A standard practise in palaeogenome analysis is the conversion of mapped short read data into pseudohaploid sequences, frequently by selecting a single high-quality nucleotide at random from the stack of mapped reads. This controls for biases due to differential sequencing coverage, but it does not control for differential rates and types of sequencing error, which are frequently large and variable in datasets obtained from ancient samples. These errors have the potential to distort phylogenetic and population clustering analyses, and to mislead tests of admixture using D statistics. We introduce Consensify, a method for generating pseudohaploid sequences, which controls for biases resulting from differential sequencing coverage while greatly reducing error rates. The error correction is derived directly from the data itself, without the requirement for additional genomic resources or simplifying assumptions such as contemporaneous sampling. For phylogenetic and population clustering analysis, we find that Consensify is less affected by artefacts than methods based on single read sampling. For D statistics, Consensify is more resistant to false positives and appears to be less affected by biases resulting from different laboratory protocols than other frequently used methods. Although Consensify is developed with palaeogenomic data in mind, it is applicable for any low to medium coverage short read datasets. We predict that Consensify will be a useful tool for future studies of palaeogenomes.}, language = {en} } @misc{GebserSchaubThieleetal.2011, author = {Gebser, Martin and Schaub, Torsten H. and Thiele, Sven and Veber, Philippe}, title = {Detecting inconsistencies in large biological networks with answer set programming}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {561}, issn = {1866-8372}, doi = {10.25932/publishup-41246}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-412467}, pages = {38}, year = {2011}, abstract = {We introduce an approach to detecting inconsistencies in large biological networks by using answer set programming. To this end, we build upon a recently proposed notion of consistency between biochemical/genetic reactions and high-throughput profiles of cell activity. We then present an approach based on answer set programming to check the consistency of large-scale data sets. Moreover, we extend this methodology to provide explanations for inconsistencies by determining minimal representations of conflicts. In practice, this can be used to identify unreliable data or to indicate missing reactions.}, language = {en} } @phdthesis{RobainaEstevez2017, author = {Robaina Estevez, Semidan}, title = {Context-specific metabolic predictions}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-401365}, school = {Universit{\"a}t Potsdam}, pages = {vi, 158}, year = {2017}, abstract = {All life-sustaining processes are ultimately driven by thousands of biochemical reactions occurring in the cells: the metabolism. These reactions form an intricate network which produces all required chemical compounds, i.e., metabolites, from a set of input molecules. Cells regulate the activity through metabolic reactions in a context-specific way; only reactions that are required in a cellular context, e.g., cell type, developmental stage or environmental condition, are usually active, while the rest remain inactive. The context-specificity of metabolism can be captured by several kinds of experimental data, such as by gene and protein expression or metabolite profiles. In addition, these context-specific data can be assimilated into computational models of metabolism, which then provide context-specific metabolic predictions. This thesis is composed of three individual studies focussing on context-specific experimental data integration into computational models of metabolism. The first study presents an optimization-based method to obtain context-specific metabolic predictions, and offers the advantage of being fully automated, i.e., free of user defined parameters. The second study explores the effects of alternative optimal solutions arising during the generation of context-specific metabolic predictions. These alternative optimal solutions are metabolic model predictions that represent equally well the integrated data, but that can markedly differ. This study proposes algorithms to analyze the space of alternative solutions, as well as some ways to cope with their impact in the predictions. Finally, the third study investigates the metabolic specialization of the guard cells of the plant Arabidopsis thaliana, and compares it with that of a different cell type, the mesophyll cells. To this end, the computational methods developed in this thesis are applied to obtain metabolic predictions specific to guard cell and mesophyll cells. These cell-specific predictions are then compared to explore the differences in metabolic activity between the two cell types. In addition, the effects of alternative optima are taken into consideration when comparing the two cell types. The computational results indicate a major reorganization of the primary metabolism in guard cells. These results are supported by an independent 13C labelling experiment.}, language = {en} } @article{GebserSchaubThieleetal.2011, author = {Gebser, Martin and Schaub, Torsten H. and Thiele, Sven and Veber, Philippe}, title = {Detecting inconsistencies in large biological networks with answer set programming}, series = {Theory and practice of logic programming}, volume = {11}, journal = {Theory and practice of logic programming}, number = {5-6}, publisher = {Cambridge Univ. Press}, address = {New York}, issn = {1471-0684}, doi = {10.1017/S1471068410000554}, pages = {323 -- 360}, year = {2011}, abstract = {We introduce an approach to detecting inconsistencies in large biological networks by using answer set programming. To this end, we build upon a recently proposed notion of consistency between biochemical/genetic reactions and high-throughput profiles of cell activity. We then present an approach based on answer set programming to check the consistency of large-scale data sets. Moreover, we extend this methodology to provide explanations for inconsistencies by determining minimal representations of conflicts. In practice, this can be used to identify unreliable data or to indicate missing reactions.}, language = {en} } @phdthesis{Grimbs2009, author = {Grimbs, Sergio}, title = {Towards structure and dynamics of metabolic networks}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-32397}, school = {Universit{\"a}t Potsdam}, year = {2009}, abstract = {This work presents mathematical and computational approaches to cover various aspects of metabolic network modelling, especially regarding the limited availability of detailed kinetic knowledge on reaction rates. It is shown that precise mathematical formulations of problems are needed i) to find appropriate and, if possible, efficient algorithms to solve them, and ii) to determine the quality of the found approximate solutions. Furthermore, some means are introduced to gain insights on dynamic properties of metabolic networks either directly from the network structure or by additionally incorporating steady-state information. Finally, an approach to identify key reactions in a metabolic networks is introduced, which helps to develop simple yet useful kinetic models. The rise of novel techniques renders genome sequencing increasingly fast and cheap. In the near future, this will allow to analyze biological networks not only for species but also for individuals. Hence, automatic reconstruction of metabolic networks provides itself as a means for evaluating this huge amount of experimental data. A mathematical formulation as an optimization problem is presented, taking into account existing knowledge and experimental data as well as the probabilistic predictions of various bioinformatical methods. The reconstructed networks are optimized for having large connected components of high accuracy, hence avoiding fragmentation into small isolated subnetworks. The usefulness of this formalism is exemplified on the reconstruction of the sucrose biosynthesis pathway in Chlamydomonas reinhardtii. The problem is shown to be computationally demanding and therefore necessitates efficient approximation algorithms. The problem of minimal nutrient requirements for genome-scale metabolic networks is analyzed. Given a metabolic network and a set of target metabolites, the inverse scope problem has as it objective determining a minimal set of metabolites that have to be provided in order to produce the target metabolites. These target metabolites might stem from experimental measurements and therefore are known to be produced by the metabolic network under study, or are given as the desired end-products of a biotechological application. The inverse scope problem is shown to be computationally hard to solve. However, I assume that the complexity strongly depends on the number of directed cycles within the metabolic network. This might guide the development of efficient approximation algorithms. Assuming mass-action kinetics, chemical reaction network theory (CRNT) allows for eliciting conclusions about multistability directly from the structure of metabolic networks. Although CRNT is based on mass-action kinetics originally, it is shown how to incorporate further reaction schemes by emulating molecular enzyme mechanisms. CRNT is used to compare several models of the Calvin cycle, which differ in size and level of abstraction. Definite results are obtained for small models, but the available set of theorems and algorithms provided by CRNT can not be applied to larger models due to the computational limitations of the currently available implementations of the provided algorithms. Given the stoichiometry of a metabolic network together with steady-state fluxes and concentrations, structural kinetic modelling allows to analyze the dynamic behavior of the metabolic network, even if the explicit rate equations are not known. In particular, this sampling approach is used to study the stabilizing effects of allosteric regulation in a model of human erythrocytes. Furthermore, the reactions of that model can be ranked according to their impact on stability of the steady state. The most important reactions in that respect are identified as hexokinase, phosphofructokinase and pyruvate kinase, which are known to be highly regulated and almost irreversible. Kinetic modelling approaches using standard rate equations are compared and evaluated against reference models for erythrocytes and hepatocytes. The results from this simplified kinetic models can simulate acceptably the temporal behavior for small changes around a given steady state, but fail to capture important characteristics for larger changes. The aforementioned approach to rank reactions according to their influence on stability is used to identify a small number of key reactions. These reactions are modelled in detail, including knowledge about allosteric regulation, while all other reactions were still described by simplified reaction rates. These so-called hybrid models can capture the characteristics of the reference models significantly better than the simplified models alone. The resulting hybrid models might serve as a good starting point for kinetic modelling of genome-scale metabolic networks, as they provide reasonable results in the absence of experimental data, regarding, for instance, allosteric regulations, for a vast majority of enzymatic reactions.}, language = {en} }