@article{OmranianAngeleskaNikoloski2021,
  author    = {Omranian, Sara and Angeleska, Angela and Nikoloski, Zoran},
  title     = {PC2P},
  series = {Bioinformatics},
  volume    = {37},
  journal   = {Bioinformatics},
  number    = {1},
  publisher = {Oxford Univ. Press},
  address   = {Oxford},
  issn      = {1367-4803},
  doi       = {10.1093/bioinformatics/btaa1089},
  pages     = {73 -- 81},
  year      = {2021},
  abstract  = {Motivation: Prediction of protein complexes from protein-protein interaction (PPI) networks is an important problem in systems biology, as they control different cellular functions. The existing solutions employ algorithms for network community detection that identify dense subgraphs in PPI networks. However, gold standards in yeast and human indicate that protein complexes can also induce sparse subgraphs, introducing further challenges in protein complex prediction. Results: To address this issue, we formalize protein complexes as biclique spanned subgraphs, which include both sparse and dense subgraphs. We then cast the problem of protein complex prediction as a network partitioning into biclique spanned subgraphs with removal of minimum number of edges, called coherent partition. Since finding a coherent partition is a computationally intractable problem, we devise a parameter-free greedy approximation algorithm, termed Protein Complexes from Coherent Partition (PC2P), based on key properties of biclique spanned subgraphs. Through comparison with nine contenders, we demonstrate that PC2P: (i) successfully identifies modular structure in networks, as a prerequisite for protein complex prediction, (ii) outperforms the existing solutions with respect to a composite score of five performance measures on 75\% and 100\% of the analyzed PPI networks and gold standards in yeast and human, respectively, and (iii,iv) does not compromise GO semantic similarity and enrichment score of the predicted protein complexes. Therefore, our study demonstrates that clustering of networks in terms of biclique spanned subgraphs is a promising framework for detection of complexes in PPI networks.},
  language  = {en}
}
@article{LyallNikoloskiGechev2020,
  author    = {Lyall, Rafe and Nikoloski, Zoran and Gechev, Tsanko},
  title     = {Comparative analysis of ROS network genes in extremophile Eukaryotes},
  series = {International journal of molecular sciences},
  volume    = {21},
  journal   = {International journal of molecular sciences},
  number    = {23},
  publisher = {Molecular Diversity Preservation International (MDPI)},
  address   = {Basel},
  issn      = {1422-0067},
  doi       = {10.3390/ijms21239131},
  pages     = {27},
  year      = {2020},
  abstract  = {The reactive oxygen species (ROS) gene network, consisting of both ROS-generating and detoxifying enzymes, adjusts ROS levels in response to various stimuli. We performed a cross-kingdom comparison of ROS gene networks to investigate how they have evolved across all Eukaryotes, including protists, fungi, plants and animals. We included the genomes of 16 extremotolerant Eukaryotes to gain insight into ROS gene evolution in organisms that experience extreme stress conditions. Our analysis focused on ROS genes found in all Eukaryotes (such as catalases, superoxide dismutases, glutathione reductases, peroxidases and glutathione peroxidase/peroxiredoxins) as well as those specific to certain groups, such as ascorbate peroxidases, dehydroascorbate/monodehydroascorbate reductases in plants and other photosynthetic organisms. ROS-producing NADPH oxidases (NOX) were found in most multicellular organisms, although several NOX-like genes were identified in unicellular or filamentous species. However, despite the extreme conditions experienced by extremophile species, we found no evidence for expansion of ROS-related gene families in these species compared to other Eukaryotes. Tardigrades and rotifers do show ROS gene expansions that could be related to their extreme lifestyles, although a high rate of lineage-specific horizontal gene transfer events, coupled with recent tetraploidy in rotifers, could explain this observation. This suggests that the basal Eukaryotic ROS scavenging systems are sufficient to maintain ROS homeostasis even under the most extreme conditions.},
  language  = {en}
}
@article{TongKuekenRazaghiMoghadametal.2021,
  author    = {Tong, Hao and K{\"u}ken, Anika and Razaghi-Moghadam, Zahra and Nikoloski, Zoran},
  title     = {Characterization of effects of genetic variants via genome-scale metabolic modelling},
  series = {Cellular and molecular life sciences : CMLS},
  volume    = {78},
  journal   = {Cellular and molecular life sciences : CMLS},
  number    = {12},
  publisher = {Springer International Publishing AG},
  address   = {Cham},
  issn      = {1420-682X},
  doi       = {10.1007/s00018-021-03844-4},
  pages     = {5123 -- 5138},
  year      = {2021},
  abstract  = {Genome-scale metabolic networks for model plants and crops in combination with approaches from the constraint-based modelling framework have been used to predict metabolic traits and design metabolic engineering strategies for their manipulation. With the advances in technologies to generate large-scale genotyping data from natural diversity panels and other populations, genome-wide association and genomic selection have emerged as statistical approaches to determine genetic variants associated with and predictive of traits. Here, we review recent advances in constraint-based approaches that integrate genetic variants in genome-scale metabolic models to characterize their effects on reaction fluxes. Since some of these approaches have been applied in organisms other than plants, we provide a critical assessment of their applicability particularly in crops. In addition, we further dissect the inferred effects of genetic variants with respect to reaction rate constants, abundances of enzymes, and concentrations of metabolites, as main determinants of reaction fluxes and relate them with their combined effects on complex traits, like growth. Through this systematic review, we also provide a roadmap for future research to increase the predictive power of statistical approaches by coupling them with mechanistic models of metabolism.},
  language  = {en}
}
@article{PriesRazaghiMoghadamKopkaetal.2021,
  author    = {Pries, Christopher and Razaghi-Moghadam, Zahra and Kopka, Joachim and Nikoloski, Zoran},
  title     = {Integration of relative metabolomics and transcriptomics time-course data in a metabolic model pinpoints effects of ribosome biogenesis defects on Arabidopsis thaliana metabolism},
  series = {Scientific reports},
  volume    = {11},
  journal   = {Scientific reports},
  number    = {1},
  publisher = {Macmillan Publishers Limited, part of Springer Nature},
  address   = {London},
  issn      = {2045-2322},
  doi       = {10.1038/s41598-021-84114-y},
  pages     = {12},
  year      = {2021},
  abstract  = {Ribosome biogenesis is tightly associated to plant metabolism due to the usage of ribosomes in the synthesis of proteins necessary to drive metabolic pathways. Given the central role of ribosome biogenesis in cell physiology, it is important to characterize the impact of different components involved in this process on plant metabolism. Double mutants of the Arabidopsis thaliana cytosolic 60S maturation factors REIL1 and REIL2 do not resume growth after shift to moderate 10 degrees C chilling conditions. To gain mechanistic insights into the metabolic effects of this ribosome biogenesis defect on metabolism, we developed TC-iReMet2, a constraint-based modelling approach that integrates relative metabolomics and transcriptomics time-course data to predict differential fluxes on a genome-scale level. We employed TC-iReMet2 with metabolomics and transcriptomics data from the Arabidopsis Columbia 0 wild type and the reil1-1 reil2-1 double mutant before and after cold shift. We identified reactions and pathways that are highly altered in a mutant relative to the wild type. These pathways include the Calvin-Benson cycle, photorespiration, gluconeogenesis, and glycolysis. Our findings also indicated differential NAD(P)/NAD(P)H ratios after cold shift. TC-iReMet2 allows for mechanistic hypothesis generation and interpretation of system biology experiments related to metabolic fluxes on a genome-scale level.},
  language  = {en}
}
@article{SeepRazaghiMoghadamNikoloski2021,
  author    = {Seep, Lea and Razaghi-Moghadam, Zahra and Nikoloski, Zoran},
  title     = {Reaction lumping in metabolic networks for application with thermodynamic metabolic flux analysis},
  series = {Scientific reports},
  volume    = {11},
  journal   = {Scientific reports},
  number    = {1},
  publisher = {Macmillan Publishers Limited, part of Springer Nature},
  address   = {London},
  issn      = {2045-2322},
  doi       = {10.1038/s41598-021-87643-8},
  pages     = {11},
  year      = {2021},
  abstract  = {Thermodynamic metabolic flux analysis (TMFA) can narrow down the space of steady-state flux distributions, but requires knowledge of the standard Gibbs free energy for the modelled reactions. The latter are often not available due to unknown Gibbs free energy change of formation ,Delta fG0, of metabolites. To optimize the usage of data on thermodynamics in constraining a model, reaction lumping has been proposed to eliminate metabolites with unknown Delta fG0. However, the lumping procedure has not been formalized nor implemented for systematic identification of lumped reactions. Here, we propose, implement, and test a combined procedure for reaction lumping, applicable to genome-scale metabolic models. It is based on identification of groups of metabolites with unknown Delta fG0 whose elimination can be conducted independently of the others via: (1) group implementation, aiming to eliminate an entire such group, and, if this is infeasible, (2) a sequential implementation to ensure that a maximal number of metabolites with unknown Delta fG0 are eliminated. Our comparative analysis with genome-scale metabolic models of Escherichia coli, Bacillus subtilis, and Homo sapiens shows that the combined procedure provides an efficient means for systematic identification of lumped reactions. We also demonstrate that TMFA applied to models with reactions lumped according to the proposed procedure lead to more precise predictions in comparison to the original models. The provided implementation thus ensures the reproducibility of the findings and their application with standard TMFA.},
  language  = {en}
}
@article{MbebiTongNikoloski2021,
  author    = {Mbebi, Alain J. and Tong, Hao and Nikoloski, Zoran},
  title     = {L-2,L-1-norm regularized multivariate regression model with applications to genomic prediction},
  series = {Bioinformatics},
  volume    = {37},
  journal   = {Bioinformatics},
  number    = {18},
  publisher = {Oxford Univ. Press},
  address   = {Oxford},
  issn      = {1367-4803},
  doi       = {10.1093/bioinformatics/btab212},
  pages     = {2896 -- 2904},
  year      = {2021},
  abstract  = {Motivation: Genomic selection (GS) is currently deemed the most effective approach to speed up breeding of agricultural varieties. It has been recognized that consideration of multiple traits in GS can improve accuracy of prediction for traits of low heritability. However, since GS forgoes statistical testing with the idea of improving predictions, it does not facilitate mechanistic understanding of the contribution of particular single nucleotide polymorphisms (SNP). Results: Here, we propose a L-2,L-1-norm regularized multivariate regression model and devise a fast and efficient iterative optimization algorithm, called L-2,L-1-joint, applicable in multi-trait GS. The usage of the L-2,L-1-norm facilitates variable selection in a penalized multivariate regression that considers the relation between individuals, when the number of SNPs is much larger than the number of individuals. The capacity for variable selection allows us to define master regulators that can be used in a multi-trait GS setting to dissect the genetic architecture of the analyzed traits. Our comparative analyses demonstrate that the proposed model is a favorable candidate compared to existing state-of-the-art approaches. Prediction and variable selection with datasets from Brassica napus, wheat and Arabidopsis thaliana diversity panels are conducted to further showcase the performance of the proposed model.},
  language  = {en}
}
@article{KuekenWenderingLangaryetal.2021,
  author    = {K{\"u}ken, Anika and Wendering, Philipp and Langary, Damoun and Nikoloski, Zoran},
  title     = {A structural property for reduction of biochemical networks},
  series = {Scientific reports},
  volume    = {11},
  journal   = {Scientific reports},
  number    = {1},
  publisher = {Macmillan Publishers Limited, part of Springer Nature},
  address   = {London},
  issn      = {2045-2322},
  doi       = {10.1038/s41598-021-96835-1},
  pages     = {11},
  year      = {2021},
  abstract  = {Large-scale biochemical models are of increasing sizes due to the consideration of interacting organisms and tissues. Model reduction approaches that preserve the flux phenotypes can simplify the analysis and predictions of steady-state metabolic phenotypes. However, existing approaches either restrict functionality of reduced models or do not lead to significant decreases in the number of modelled metabolites. Here, we introduce an approach for model reduction based on the structural property of balancing of complexes that preserves the steady-state fluxes supported by the network and can be efficiently determined at genome scale. Using two large-scale mass-action kinetic models of Escherichia coli, we show that our approach results in a substantial reduction of 99\% of metabolites. Applications to genome-scale metabolic models across kingdoms of life result in up to 55\% and 85\% reduction in the number of metabolites when arbitrary and mass-action kinetics is assumed, respectively. We also show that predictions of the specific growth rate from the reduced models match those based on the original models. Since steady-state flux phenotypes from the original model are preserved in the reduced, the approach paves the way for analysing other metabolic phenotypes in large-scale biochemical networks.},
  language  = {en}
}
@article{NowakGennermannPerssonetal.2020,
  author    = {Nowak, Jacqueline and Gennermann, Kristin and Persson, Staffan and Nikoloski, Zoran},
  title     = {CytoSeg 2.0},
  series = {Bioinformatics},
  volume    = {36},
  journal   = {Bioinformatics},
  number    = {9},
  publisher = {Oxford Univ. Press},
  address   = {Oxford},
  issn      = {1367-4803},
  doi       = {10.1093/bioinformatics/btaa035},
  pages     = {2950 -- 2951},
  year      = {2020},
  abstract  = {Motivation: Actin filaments (AFs) are dynamic structures that substantially change their organization over time. The dynamic behavior and the relatively low signal-to-noise ratio during live-cell imaging have rendered the quantification of the actin organization a difficult task. Results: We developed an automated image-based framework that extracts AFs from fluorescence microscopy images and represents them as networks, which are automatically analyzed to identify and compare biologically relevant features. Although the source code is freely available, we have now implemented the framework into a graphical user interface that can be installed as a Fiji plugin, thus enabling easy access by the research community.},
  language  = {en}
}
@article{WenderingNikoloski2022,
  author    = {Wendering, Philipp and Nikoloski, Zoran},
  title     = {COMMIT},
  series = {PLoS Computational Biology : a new community journal / publ. by the Public Library of Science (PLoS) in association with the International Society for Computational Biology (ISCB)},
  volume    = {18},
  journal   = {PLoS Computational Biology : a new community journal / publ. by the Public Library of Science (PLoS) in association with the International Society for Computational Biology (ISCB)},
  number    = {3},
  publisher = {Public Library of Science},
  address   = {San Fransisco},
  issn      = {1553-734X},
  doi       = {10.1371/journal.pcbi.1009906},
  pages     = {24},
  year      = {2022},
  abstract  = {Composition and functions of microbial communities affect important traits in diverse hosts, from crops to humans. Yet, mechanistic understanding of how metabolism of individual microbes is affected by the community composition and metabolite leakage is lacking. Here, we first show that the consensus of automatically generated metabolic reconstructions improves the quality of the draft reconstructions, measured by comparison to reference models. We then devise an approach for gap filling, termed COMMIT, that considers metabolites for secretion based on their permeability and the composition of the community. By applying COMMIT with two soil communities from the Arabidopsis thaliana culture collection, we could significantly reduce the gap-filling solution in comparison to filling gaps in individual reconstructions without affecting the genomic support. Inspection of the metabolic interactions in the soil communities allows us to identify microbes with community roles of helpers and beneficiaries. Therefore, COMMIT offers a versatile fully automated solution for large-scale modelling of microbial communities for diverse biotechnological applications. <br /> Author summaryMicrobial communities are important in ecology, human health, and crop productivity. However, detailed information on the interactions within natural microbial communities is hampered by the community size, lack of detailed information on the biochemistry of single organisms, and the complexity of interactions between community members. Metabolic models are comprised of biochemical reaction networks based on the genome annotation, and can provide mechanistic insights into community functions. Previous analyses of microbial community models have been performed with high-quality reference models or models generated using a single reconstruction pipeline. However, these models do not contain information on the composition of the community that determines the metabolites exchanged between the community members. In addition, the quality of metabolic models is affected by the reconstruction approach used, with direct consequences on the inferred interactions between community members. Here, we use fully automated consensus reconstructions from four approaches to arrive at functional models with improved genomic support while considering the community composition. We applied our pipeline to two soil communities from the Arabidopsis thaliana culture collection, providing only genome sequences. Finally, we show that the obtained models have 90\% genomic support and demonstrate that the derived interactions are corroborated by independent computational predictions.},
  language  = {en}
}
@article{KuekenGennermannNikoloski2020,
  author    = {K{\"u}ken, Anika and Gennermann, Kristin and Nikoloski, Zoran},
  title     = {Characterization of maximal enzyme catalytic rates in central metabolism of Arabidopsis thaliana},
  series = {The plant journal},
  volume    = {103},
  journal   = {The plant journal},
  number    = {6},
  publisher = {Wiley},
  address   = {Oxford},
  issn      = {0960-7412},
  doi       = {10.1111/tpj.14890},
  pages     = {2168 -- 2177},
  year      = {2020},
  abstract  = {Availability of plant-specific enzyme kinetic data is scarce, limiting the predictive power of metabolic models and precluding identification of genetic factors of enzyme properties. Enzyme kinetic data are measuredin vitro, often under non-physiological conditions, and conclusions elicited from modeling warrant caution. Here we estimate maximalin vivocatalytic rates for 168 plant enzymes, including photosystems I and II, cytochrome-b6f complex, ATP-citrate synthase, sucrose-phosphate synthase as well as enzymes from amino acid synthesis with previously undocumented enzyme kinetic data in BRENDA. The estimations are obtained by integrating condition-specific quantitative proteomics data, maximal rates of selected enzymes, growth measurements fromArabidopsis thalianarosette with and fluxes through canonical pathways in a constraint-based model of leaf metabolism. In comparison to findings inEscherichia coli, we demonstrate weaker concordance between the plant-specificin vitroandin vivoenzyme catalytic rates due to a low degree of enzyme saturation. This is supported by the finding that concentrations of nicotinamide adenine dinucleotide (phosphate), adenosine triphosphate and uridine triphosphate, calculated based on our maximalin vivocatalytic rates, and available quantitative metabolomics data are below reportedKMvalues and, therefore, indicate undersaturation of respective enzymes. Our findings show that genome-wide profiling of enzyme kinetic properties is feasible in plants, paving the way for understanding resource allocation.},
  language  = {en}
}