@article{CordobaTongBurgosetal.2023,
  author    = {C{\´o}rdoba, Sandra Correa and Tong, Hao and Burgos, Asdrubal and Zhu, Feng and Alseekh, Saleh and Fernie, Alisdair R. and Nikoloski, Zoran},
  title     = {Identification of gene function based on models capturing natural variability of Arabidopsis thaliana lipid metabolism},
  series = {Nature Communications},
  volume    = {14},
  journal   = {Nature Communications},
  number    = {1},
  publisher = {Springer Nature},
  address   = {London},
  issn      = {2041-1723},
  doi       = {10.1038/s41467-023-40644-9},
  pages     = {12},
  year      = {2023},
  abstract  = {The use of automated tools to reconstruct lipid metabolic pathways is not warranted in plants. Here, the authors construct Plant Lipid Module for Arabidopsis rosette using constraint-based modeling, demonstrate its integration in other plant metabolic models, and use it to dissect the genetic architecture of lipid metabolism. Lipids play fundamental roles in regulating agronomically important traits. Advances in plant lipid metabolism have until recently largely been based on reductionist approaches, although modulation of its components can have system-wide effects. However, existing models of plant lipid metabolism provide lumped representations, hindering detailed study of component modulation. Here, we present the Plant Lipid Module (PLM) which provides a mechanistic description of lipid metabolism in the Arabidopsis thaliana rosette. We demonstrate that the PLM can be readily integrated in models of A. thaliana Col-0 metabolism, yielding accurate predictions (83\%) of single lethal knock-outs and 75\% concordance between measured transcript and predicted flux changes under extended darkness. Genome-wide associations with fluxes obtained by integrating the PLM in diel condition- and accession-specific models identify up to 65 candidate genes modulating A. thaliana lipid metabolism. Using mutant lines, we validate up to 40\% of the candidates, paving the way for identification of metabolic gene function based on models capturing natural variability in metabolism.},
  language  = {en}
}
@article{ArendZimmerXuetal.2023,
  author    = {Arend, Marius and Zimmer, David and Xu, Rudan and Sommer, Frederik and M{\"u}hlhaus, Timo and Nikoloski, Zoran},
  title     = {Proteomics and constraint-based modelling reveal enzyme kinetic properties of Chlamydomonas reinhardtii on a genome scale},
  series = {Nature Communications},
  volume    = {14},
  journal   = {Nature Communications},
  number    = {1},
  publisher = {Springer Nature},
  address   = {London},
  issn      = {2041-1723},
  doi       = {10.1038/s41467-023-40498-1},
  pages     = {9},
  year      = {2023},
  abstract  = {Metabolic engineering of microalgae offers a promising solution for sustainable biofuel production, and rational design of engineering strategies can be improved by employing metabolic models that integrate enzyme turnover numbers. However, the coverage of turnover numbers for Chlamydomonas reinhardtii, a model eukaryotic microalga accessible to metabolic engineering, is 17-fold smaller compared to the heterotrophic cell factory Saccharomyces cerevisiae. Here we generate quantitative protein abundance data of Chlamydomonas covering 2337 to 3708 proteins in various growth conditions to estimate in vivo maximum apparent turnover numbers. Using constrained-based modeling we provide proxies for in vivo turnover numbers of 568 reactions, representing a 10-fold increase over the in vitro data for Chlamydomonas. Integration of the in vivo estimates instead of in vitro values in a metabolic model of Chlamydomonas improved the accuracy of enzyme usage predictions. Our results help in extending the knowledge on uncharacterized enzymes and improve biotechnological applications of Chlamydomonas.},
  language  = {en}
}
@article{OmranianAngeleskaNikoloski2021,
  author    = {Omranian, Sara and Angeleska, Angela and Nikoloski, Zoran},
  title     = {PC2P},
  series = {Bioinformatics},
  volume    = {37},
  journal   = {Bioinformatics},
  number    = {1},
  publisher = {Oxford Univ. Press},
  address   = {Oxford},
  issn      = {1367-4811},
  doi       = {10.1093/bioinformatics/btaa1089},
  pages     = {73 -- 81},
  year      = {2021},
  abstract  = {Motivation: Prediction of protein complexes from protein-protein interaction (PPI) networks is an important problem in systems biology, as they control different cellular functions. The existing solutions employ algorithms for network community detection that identify dense subgraphs in PPI networks. However, gold standards in yeast and human indicate that protein complexes can also induce sparse subgraphs, introducing further challenges in protein complex prediction. Results: To address this issue, we formalize protein complexes as biclique spanned subgraphs, which include both sparse and dense subgraphs. We then cast the problem of protein complex prediction as a network partitioning into biclique spanned subgraphs with removal of minimum number of edges, called coherent partition. Since finding a coherent partition is a computationally intractable problem, we devise a parameter-free greedy approximation algorithm, termed Protein Complexes from Coherent Partition (PC2P), based on key properties of biclique spanned subgraphs. Through comparison with nine contenders, we demonstrate that PC2P: (i) successfully identifies modular structure in networks, as a prerequisite for protein complex prediction, (ii) outperforms the existing solutions with respect to a composite score of five performance measures on 75\% and 100\% of the analyzed PPI networks and gold standards in yeast and human, respectively, and (iii,iv) does not compromise GO semantic similarity and enrichment score of the predicted protein complexes. Therefore, our study demonstrates that clustering of networks in terms of biclique spanned subgraphs is a promising framework for detection of complexes in PPI networks.},
  language  = {en}
}
@article{MbebiBreitlerBordeauxetal.2022,
  author    = {Mbebi, Alain J. and Breitler, Jean-Christophe and Bordeaux, M'elanie and Sulpice, Ronan and McHale, Marcus and Tong, Hao and Toniutti, Lucile and Castillo, Jonny Alonso and Bertrand, Benoit and Nikoloski, Zoran},
  title     = {A comparative analysis of genomic and phenomic predictions of growth-related traits in 3-way coffee hybrids},
  series = {G3: Genes, genomes, genetics},
  volume    = {12},
  journal   = {G3: Genes, genomes, genetics},
  number    = {9},
  publisher = {Genetics Soc. of America},
  address   = {Pittsburgh, PA},
  issn      = {2160-1836},
  doi       = {10.1093/g3journal/jkac170},
  pages     = {11},
  year      = {2022},
  abstract  = {Genomic prediction has revolutionized crop breeding despite remaining issues of transferability of models to unseen environmental conditions and environments. Usage of endophenotypes rather than genomic markers leads to the possibility of building phenomic prediction models that can account, in part, for this challenge. Here, we compare and contrast genomic prediction and phenomic prediction models for 3 growth-related traits, namely, leaf count, tree height, and trunk diameter, from 2 coffee 3-way hybrid populations exposed to a series of treatment-inducing environmental conditions. The models are based on 7 different statistical methods built with genomic markers and ChlF data used as predictors. This comparative analysis demonstrates that the best-performing phenomic prediction models show higher predictability than the best genomic prediction models for the considered traits and environments in the vast majority of comparisons within 3-way hybrid populations. In addition, we show that phenomic prediction models are transferrable between conditions but to a lower extent between populations and we conclude that chlorophyll a fluorescence data can serve as alternative predictors in statistical models of coffee hybrid performance. Future directions will explore their combination with other endophenotypes to further improve the prediction of growth-related traits for crops.},
  language  = {en}
}
@article{CalderanRodriguesLuzarowskiMonteBelloetal.2021,
  author    = {Calderan-Rodrigues, Maria Juliana and Luzarowski, Marcin and Monte-Bello, Carolina Cassano and Minen, Romina Ines and Z{\"u}hlke, Boris M. and Nikoloski, Zoran and Skirycz, Aleksandra and Caldana, Camila},
  title     = {Proteogenic dipeptides are characterized by diel fluctuations and target of rapamycin complex-signaling dependency in the model plant Arabidopsis thaliana},
  series = {Frontiers in plant science : FPLS},
  volume    = {12},
  journal   = {Frontiers in plant science : FPLS},
  publisher = {Frontiers Media},
  address   = {Lausanne},
  issn      = {1664-462X},
  doi       = {10.3389/fpls.2021.758933},
  pages     = {15},
  year      = {2021},
  abstract  = {As autotrophic organisms, plants capture light energy to convert carbon dioxide into ATP, nicotinamide adenine dinucleotide phosphate (NADPH), and sugars, which are essential for the biosynthesis of building blocks, storage, and growth. At night, metabolism and growth can be sustained by mobilizing carbon (C) reserves. In response to changing environmental conditions, such as light-dark cycles, the small-molecule regulation of enzymatic activities is critical for reprogramming cellular metabolism. We have recently demonstrated that proteogenic dipeptides, protein degradation products, act as metabolic switches at the interface of proteostasis and central metabolism in both plants and yeast. Dipeptides accumulate in response to the environmental changes and act via direct binding and regulation of critical enzymatic activities, enabling C flux distribution. Here, we provide evidence pointing to the involvement of dipeptides in the metabolic rewiring characteristics for the day-night cycle in plants. Specifically, we measured the abundance of 13 amino acids and 179 dipeptides over short- (SD) and long-day (LD) diel cycles, each with different light intensities. Of the measured dipeptides, 38 and eight were characterized by day-night oscillation in SD and LD, respectively, reaching maximum accumulation at the end of the day and then gradually falling in the night. Not only the number of dipeptides, but also the amplitude of the oscillation was higher in SD compared with LD conditions. Notably, rhythmic dipeptides were enriched in the glucogenic amino acids that can be converted into glucose. Considering the known role of Target of Rapamycin (TOR) signaling in regulating both autophagy and metabolism, we subsequently investigated whether diurnal fluctuations of dipeptides levels are dependent on the TOR Complex (TORC). The Raptor1b mutant (raptor1b), known for the substantial reduction of TOR kinase activity, was characterized by the augmented accumulation of dipeptides, which is especially pronounced under LD conditions. We were particularly intrigued by the group of 16 dipeptides, which, based on their oscillation under SD conditions and accumulation in raptor1b, can be associated with limited C availability or photoperiod. By mining existing protein-metabolite interaction data, we delineated putative protein interactors for a representative dipeptide Pro-Gln. The obtained list included enzymes of C and amino acid metabolism, which are also linked to the TORC-mediated metabolic network. Based on the obtained results, we speculate that the diurnal accumulation of dipeptides contributes to its metabolic adaptation in response to changes in C availability. We hypothesize that dipeptides would act as alternative respiratory substrates and by directly modulating the activity of the focal enzymes.},
  language  = {en}
}
@article{HussJuddKoperetal.2022,
  author    = {Huß, Sebastian and Judd, Rika Siedah and Koper, Kaan and Maeda, Hiroshi A. and Nikoloski, Zoran},
  title     = {An automated workflow that generates atom mappings for large-scale metabolic models and its application to Arabidopsis thaliana},
  series = {The plant journal},
  volume    = {111},
  journal   = {The plant journal},
  number    = {5},
  publisher = {Wiley-Blackwell},
  address   = {Oxford [u.a.]},
  issn      = {0960-7412},
  doi       = {10.1111/tpj.15903},
  pages     = {1486 -- 1500},
  year      = {2022},
  abstract  = {Quantification of reaction fluxes of metabolic networks can help us understand how the integration of different metabolic pathways determines cellular functions. Yet, intracellular fluxes cannot be measured directly but are estimated with metabolic flux analysis (MFA), which relies on the patterns of isotope labeling of metabolites in the network. The application of MFA also requires a stoichiometric model with atom mappings that are currently not available for the majority of large-scale metabolic network models, particularly of plants. While automated approaches such as the Reaction Decoder Toolkit (RDT) can produce atom mappings for individual reactions, tracing the flow of individual atoms of the entire reactions across a metabolic model remains challenging. Here we establish an automated workflow to obtain reliable atom mappings for large-scale metabolic models by refining the outcome of RDT, and apply the workflow to metabolic models of Arabidopsis thaliana. We demonstrate the accuracy of RDT through a comparative analysis with atom mappings from a large database of biochemical reactions, MetaCyc. We further show the utility of our automated workflow by simulating N-15 isotope enrichment and identifying nitrogen (N)-containing metabolites which show enrichment patterns that are informative for flux estimation in future N-15-MFA studies of A. thaliana. The automated workflow established in this study can be readily expanded to other species for which metabolic models have been established and the resulting atom mappings will facilitate MFA and graph-theoretic structural analyses with large-scale metabolic networks.},
  language  = {en}
}
@article{OmranianAngeleskaNikoloski2021,
  author    = {Omranian, Sara and Angeleska, Angela and Nikoloski, Zoran},
  title     = {Efficient and accurate identification of protein complexes from protein-protein interaction networks based on the clustering coefficient},
  series = {Computational and structural biotechnology journal},
  volume    = {19},
  journal   = {Computational and structural biotechnology journal},
  publisher = {Elsevier},
  address   = {Amsterdam},
  issn      = {2001-0370},
  doi       = {10.1016/j.csbj.2021.09.014},
  pages     = {5255 -- 5263},
  year      = {2021},
  abstract  = {Identification of protein complexes from protein-protein interaction (PPI) networks is a key problem in PPI mining, solved by parameter-dependent approaches that suffer from small recall rates. Here we introduce GCC-v, a family of efficient, parameter-free algorithms to accurately predict protein complexes using the (weighted) clustering coefficient of proteins in PPI networks. Through comparative analyses with gold standards and PPI networks from Escherichia coli, Saccharomyces cerevisiae, and Homo sapiens, we demonstrate that GCC-v outperforms twelve state-of-the-art approaches for identification of protein complexes with respect to twelve performance measures in at least 85.71\% of scenarios. We also show that GCC-v results in the exact recovery of similar to 35\% of protein complexes in a pan-plant PPI network and discover 144 new protein complexes in Arabidopsis thaliana, with high support from GO semantic similarity. Our results indicate that findings from GCC-v are robust to network perturbations, which has direct implications to assess the impact of the PPI network quality on the predicted protein complexes. (C) 2021 The Author(s). Published by Elsevier B.V. on behalf of Research Network of Computational and Structural Biotechnology.},
  language  = {en}
}
@article{TongNankarLiuetal.2022,
  author    = {Tong, Hao and Nankar, Amol N. and Liu, Jintao and Todorova, Velichka and Ganeva, Daniela and Grozeva, Stanislava and Tringovska, Ivanka and Pasev, Gancho and Radeva-Ivanova, Vesela and Gechev, Tsanko and Kostova, Dimitrina and Nikoloski, Zoran},
  title     = {Genomic prediction of morphometric and colorimetric traits in Solanaceous fruits},
  series = {Horticulture research},
  volume    = {9},
  journal   = {Horticulture research},
  publisher = {Oxford Univ. Press},
  address   = {Cary},
  issn      = {2052-7276},
  doi       = {10.1093/hr/uhac072},
  pages     = {11},
  year      = {2022},
  abstract  = {Selection of high-performance lines with respect to traits of interest is a key step in plant breeding. Genomic prediction allows to determine the genomic estimated breeding values of unseen lines for trait of interest using genetic markers, e.g. single-nucleotide polymorphisms (SNPs), and machine learning approaches, which can therefore shorten breeding cycles, referring to genomic selection (GS). Here, we applied GS approaches in two populations of Solanaceous crops, i.e. tomato and pepper, to predict morphometric and colorimetric traits. The traits were measured by using scoring-based conventional descriptors (CDs) as well as by Tomato Analyzer (TA) tool using the longitudinally and latitudinally cut fruit images. The GS performance was assessed in cross-validations of classification-based and regression-based machine learning models for CD and TA traits, respectively. The results showed the usage of TA traits and tag SNPs provide a powerful combination to predict morphology and color-related traits of Solanaceous fruits. The highest predictability of 0.89 was achieved for fruit width in pepper, with an average predictability of 0.69 over all traits. The multi-trait GS models are of slightly better predictability than single-trait models for some colorimetric traits in pepper. While model validation performs poorly on wild tomato accessions, the usage as many as one accession per wild species in the training set can increase the transferability of models to unseen populations for some traits (e.g. fruit shape for which predictability in unseen scenario increased from zero to 0.6). Overall, GS approaches can assist the selection of high-performance Solanaceous fruits in crop breeding.},
  language  = {en}
}
@article{AngeleskaOmranianNikoloski2021,
  author    = {Angeleska, Angela and Omranian, Sara and Nikoloski, Zoran},
  title     = {Coherent network partitions},
  series = {Theoretical computer science : the journal of the EATCS},
  volume    = {894},
  journal   = {Theoretical computer science : the journal of the EATCS},
  publisher = {Elsevier},
  address   = {Amsterdam [u.a.]},
  issn      = {0304-3975},
  doi       = {10.1016/j.tcs.2021.10.002},
  pages     = {3 -- 11},
  year      = {2021},
  abstract  = {We continue to study coherent partitions of graphs whereby the vertex set is partitioned into subsets that induce biclique spanned subgraphs. The problem of identifying the minimum number of edges to obtain biclique spanned connected components (CNP), called the coherence number, is NP-hard even on bipartite graphs. Here, we propose a graph transformation geared towards obtaining an O (log n)-approximation algorithm for the CNP on a bipartite graph with n vertices. The transformation is inspired by a new characterization of biclique spanned subgraphs. In addition, we study coherent partitions on prime graphs, and show that finding coherent partitions reduces to the problem of finding coherent partitions in a prime graph. Therefore, these results provide future directions for approximation algorithms for the coherence number of a given graph.},
  language  = {en}
}
@article{WenderingNikoloski2022,
  author    = {Wendering, Philipp and Nikoloski, Zoran},
  title     = {COMMIT},
  series = {PLoS Computational Biology : a new community journal / publ. by the Public Library of Science (PLoS) in association with the International Society for Computational Biology (ISCB)},
  volume    = {18},
  journal   = {PLoS Computational Biology : a new community journal / publ. by the Public Library of Science (PLoS) in association with the International Society for Computational Biology (ISCB)},
  number    = {3},
  publisher = {Public Library of Science},
  address   = {San Fransisco},
  issn      = {1553-734X},
  doi       = {10.1371/journal.pcbi.1009906},
  pages     = {24},
  year      = {2022},
  abstract  = {Composition and functions of microbial communities affect important traits in diverse hosts, from crops to humans. Yet, mechanistic understanding of how metabolism of individual microbes is affected by the community composition and metabolite leakage is lacking. Here, we first show that the consensus of automatically generated metabolic reconstructions improves the quality of the draft reconstructions, measured by comparison to reference models. We then devise an approach for gap filling, termed COMMIT, that considers metabolites for secretion based on their permeability and the composition of the community. By applying COMMIT with two soil communities from the Arabidopsis thaliana culture collection, we could significantly reduce the gap-filling solution in comparison to filling gaps in individual reconstructions without affecting the genomic support. Inspection of the metabolic interactions in the soil communities allows us to identify microbes with community roles of helpers and beneficiaries. Therefore, COMMIT offers a versatile fully automated solution for large-scale modelling of microbial communities for diverse biotechnological applications. <br /> Author summaryMicrobial communities are important in ecology, human health, and crop productivity. However, detailed information on the interactions within natural microbial communities is hampered by the community size, lack of detailed information on the biochemistry of single organisms, and the complexity of interactions between community members. Metabolic models are comprised of biochemical reaction networks based on the genome annotation, and can provide mechanistic insights into community functions. Previous analyses of microbial community models have been performed with high-quality reference models or models generated using a single reconstruction pipeline. However, these models do not contain information on the composition of the community that determines the metabolites exchanged between the community members. In addition, the quality of metabolic models is affected by the reconstruction approach used, with direct consequences on the inferred interactions between community members. Here, we use fully automated consensus reconstructions from four approaches to arrive at functional models with improved genomic support while considering the community composition. We applied our pipeline to two soil communities from the Arabidopsis thaliana culture collection, providing only genome sequences. Finally, we show that the obtained models have 90\% genomic support and demonstrate that the derived interactions are corroborated by independent computational predictions.},
  language  = {en}
}