@article{TongNikoloski2020, author = {Tong, Hao and Nikoloski, Zoran}, title = {Machine learning approaches for crop improvement}, series = {Journal of plant physiology : biochemistry, physiology, molecular biology and biotechnology of plants}, volume = {257}, journal = {Journal of plant physiology : biochemistry, physiology, molecular biology and biotechnology of plants}, publisher = {Elsevier}, address = {M{\"u}nchen}, issn = {0176-1617}, doi = {10.1016/j.jplph.2020.153354}, pages = {13}, year = {2020}, abstract = {Highly efficient and accurate selection of elite genotypes can lead to dramatic shortening of the breeding cycle in major crops relevant for sustaining present demands for food, feed, and fuel. In contrast to classical approaches that emphasize the need for resource-intensive phenotyping at all stages of artificial selection, genomic selection dramatically reduces the need for phenotyping. Genomic selection relies on advances in machine learning and the availability of genotyping data to predict agronomically relevant phenotypic traits. Here we provide a systematic review of machine learning approaches applied for genomic selection of single and multiple traits in major crops in the past decade. We emphasize the need to gather data on intermediate phenotypes, e.g. metabolite, protein, and gene expression levels, along with developments of modeling techniques that can lead to further improvements of genomic selection. In addition, we provide a critical view of factors that affect genomic selection, with attention to transferability of models between different environments. Finally, we highlight the future aspects of integrating high-throughput molecular phenotypic data from omics technologies with biological networks for crop improvement.}, language = {en} } @article{TongNankarLiuetal.2022, author = {Tong, Hao and Nankar, Amol N. and Liu, Jintao and Todorova, Velichka and Ganeva, Daniela and Grozeva, Stanislava and Tringovska, Ivanka and Pasev, Gancho and Radeva-Ivanova, Vesela and Gechev, Tsanko and Kostova, Dimitrina and Nikoloski, Zoran}, title = {Genomic prediction of morphometric and colorimetric traits in Solanaceous fruits}, series = {Horticulture research}, volume = {9}, journal = {Horticulture research}, publisher = {Oxford Univ. Press}, address = {Cary}, issn = {2052-7276}, doi = {10.1093/hr/uhac072}, pages = {11}, year = {2022}, abstract = {Selection of high-performance lines with respect to traits of interest is a key step in plant breeding. Genomic prediction allows to determine the genomic estimated breeding values of unseen lines for trait of interest using genetic markers, e.g. single-nucleotide polymorphisms (SNPs), and machine learning approaches, which can therefore shorten breeding cycles, referring to genomic selection (GS). Here, we applied GS approaches in two populations of Solanaceous crops, i.e. tomato and pepper, to predict morphometric and colorimetric traits. The traits were measured by using scoring-based conventional descriptors (CDs) as well as by Tomato Analyzer (TA) tool using the longitudinally and latitudinally cut fruit images. The GS performance was assessed in cross-validations of classification-based and regression-based machine learning models for CD and TA traits, respectively. The results showed the usage of TA traits and tag SNPs provide a powerful combination to predict morphology and color-related traits of Solanaceous fruits. The highest predictability of 0.89 was achieved for fruit width in pepper, with an average predictability of 0.69 over all traits. The multi-trait GS models are of slightly better predictability than single-trait models for some colorimetric traits in pepper. While model validation performs poorly on wild tomato accessions, the usage as many as one accession per wild species in the training set can increase the transferability of models to unseen populations for some traits (e.g. fruit shape for which predictability in unseen scenario increased from zero to 0.6). Overall, GS approaches can assist the selection of high-performance Solanaceous fruits in crop breeding.}, language = {en} } @article{TongKuekenRazaghiMoghadametal.2021, author = {Tong, Hao and K{\"u}ken, Anika and Razaghi-Moghadam, Zahra and Nikoloski, Zoran}, title = {Characterization of effects of genetic variants via genome-scale metabolic modelling}, series = {Cellular and molecular life sciences : CMLS}, volume = {78}, journal = {Cellular and molecular life sciences : CMLS}, number = {12}, publisher = {Springer International Publishing AG}, address = {Cham}, issn = {1420-682X}, doi = {10.1007/s00018-021-03844-4}, pages = {5123 -- 5138}, year = {2021}, abstract = {Genome-scale metabolic networks for model plants and crops in combination with approaches from the constraint-based modelling framework have been used to predict metabolic traits and design metabolic engineering strategies for their manipulation. With the advances in technologies to generate large-scale genotyping data from natural diversity panels and other populations, genome-wide association and genomic selection have emerged as statistical approaches to determine genetic variants associated with and predictive of traits. Here, we review recent advances in constraint-based approaches that integrate genetic variants in genome-scale metabolic models to characterize their effects on reaction fluxes. Since some of these approaches have been applied in organisms other than plants, we provide a critical assessment of their applicability particularly in crops. In addition, we further dissect the inferred effects of genetic variants with respect to reaction rate constants, abundances of enzymes, and concentrations of metabolites, as main determinants of reaction fluxes and relate them with their combined effects on complex traits, like growth. Through this systematic review, we also provide a roadmap for future research to increase the predictive power of statistical approaches by coupling them with mechanistic models of metabolism.}, language = {en} } @article{TongKuekenNikoloski2020, author = {Tong, Hao and K{\"u}ken, Anika and Nikoloski, Zoran}, title = {Integrating molecular markers into metabolic models improves genomic selection for Arabidopsis growth}, series = {Nature Communications}, volume = {11}, journal = {Nature Communications}, number = {1}, publisher = {Nature Publishing Group UK}, address = {London}, issn = {2041-1723}, doi = {10.1038/s41467-020-16279-5}, pages = {9}, year = {2020}, abstract = {The current trends of crop yield improvements are not expected to meet the projected rise in demand. Genomic selection uses molecular markers and machine learning to identify superior genotypes with improved traits, such as growth. Plant growth directly depends on rates of metabolic reactions which transform nutrients into the building blocks of biomass. Here, we predict growth of Arabidopsis thaliana accessions by employing genomic prediction of reaction rates estimated from accession-specific metabolic models. We demonstrate that, comparing to classical genomic selection on the available data sets for 67 accessions, our approach improves the prediction accuracy for growth within and across nitrogen environments by 32.6\% and 51.4\%, respectively, and from optimal nitrogen to low carbon environment by 50.4\%. Therefore, integration of molecular markers into metabolic models offers an approach to predict traits directly related to metabolism, and its usefulness in breeding can be examined by gathering matching datasets in crops. An increase in genomic selection (GS) accuracy can accelerate genetic gain by shortening the breeding cycles. Here, the authors introduce a network-based GS method that uses metabolic models and improves the prediction accuracy of Arabidopsis growth within and across environments.}, language = {en} } @article{RodriguezCubillosTongAlseekhetal.2018, author = {Rodriguez Cubillos, Andres Eduardo and Tong, Hao and Alseekh, Saleh and de Abreu e Lima, Francisco Anastacio and Yu, Jing and Fernie, Alisdair R. and Nikoloski, Zoran and Laitinen, Roosa A. E.}, title = {Inheritance patterns in metabolism and growth in diallel crosses of Arabidopsis thaliana from a single growth habitat}, series = {Heredity}, volume = {120}, journal = {Heredity}, number = {5}, publisher = {Nature Publ. Group}, address = {London}, issn = {0018-067X}, doi = {10.1038/s41437-017-0030-5}, pages = {463 -- 473}, year = {2018}, abstract = {Metabolism is a key determinant of plant growth and modulates plant adaptive responses. Increased metabolic variation due to heterozygosity may be beneficial for highly homozygous plants if their progeny is to respond to sudden changes in the habitat. Here, we investigate the extent to which heterozygosity contributes to the variation in metabolism and size of hybrids of Arabidopsis thaliana whose parents are from a single growth habitat. We created full diallel crosses among seven parents, originating from Southern Germany, and analysed the inheritance patterns in primary and secondary metabolism as well as in rosette size in situ. In comparison to primary metabolites, compounds from secondary metabolism were more variable and showed more pronounced non-additive inheritance patterns which could be attributed to epistasis. In addition, we showed that glucosinolates, among other secondary metabolites, were positively correlated with a proxy for plant size. Therefore, our study demonstrates that heterozygosity in local A. thaliana population generates metabolic variation and may impact several tasks directly linked to metabolism.}, language = {en} } @article{MbebiTongNikoloski2021, author = {Mbebi, Alain J. and Tong, Hao and Nikoloski, Zoran}, title = {L-2,L-1-norm regularized multivariate regression model with applications to genomic prediction}, series = {Bioinformatics}, volume = {37}, journal = {Bioinformatics}, number = {18}, publisher = {Oxford Univ. Press}, address = {Oxford}, issn = {1367-4803}, doi = {10.1093/bioinformatics/btab212}, pages = {2896 -- 2904}, year = {2021}, abstract = {Motivation: Genomic selection (GS) is currently deemed the most effective approach to speed up breeding of agricultural varieties. It has been recognized that consideration of multiple traits in GS can improve accuracy of prediction for traits of low heritability. However, since GS forgoes statistical testing with the idea of improving predictions, it does not facilitate mechanistic understanding of the contribution of particular single nucleotide polymorphisms (SNP). Results: Here, we propose a L-2,L-1-norm regularized multivariate regression model and devise a fast and efficient iterative optimization algorithm, called L-2,L-1-joint, applicable in multi-trait GS. The usage of the L-2,L-1-norm facilitates variable selection in a penalized multivariate regression that considers the relation between individuals, when the number of SNPs is much larger than the number of individuals. The capacity for variable selection allows us to define master regulators that can be used in a multi-trait GS setting to dissect the genetic architecture of the analyzed traits. Our comparative analyses demonstrate that the proposed model is a favorable candidate compared to existing state-of-the-art approaches. Prediction and variable selection with datasets from Brassica napus, wheat and Arabidopsis thaliana diversity panels are conducted to further showcase the performance of the proposed model.}, language = {en} } @article{MbebiBreitlerBordeauxetal.2022, author = {Mbebi, Alain J. and Breitler, Jean-Christophe and Bordeaux, M'elanie and Sulpice, Ronan and McHale, Marcus and Tong, Hao and Toniutti, Lucile and Castillo, Jonny Alonso and Bertrand, Benoit and Nikoloski, Zoran}, title = {A comparative analysis of genomic and phenomic predictions of growth-related traits in 3-way coffee hybrids}, series = {G3: Genes, genomes, genetics}, volume = {12}, journal = {G3: Genes, genomes, genetics}, number = {9}, publisher = {Genetics Soc. of America}, address = {Pittsburgh, PA}, issn = {2160-1836}, doi = {10.1093/g3journal/jkac170}, pages = {11}, year = {2022}, abstract = {Genomic prediction has revolutionized crop breeding despite remaining issues of transferability of models to unseen environmental conditions and environments. Usage of endophenotypes rather than genomic markers leads to the possibility of building phenomic prediction models that can account, in part, for this challenge. Here, we compare and contrast genomic prediction and phenomic prediction models for 3 growth-related traits, namely, leaf count, tree height, and trunk diameter, from 2 coffee 3-way hybrid populations exposed to a series of treatment-inducing environmental conditions. The models are based on 7 different statistical methods built with genomic markers and ChlF data used as predictors. This comparative analysis demonstrates that the best-performing phenomic prediction models show higher predictability than the best genomic prediction models for the considered traits and environments in the vast majority of comparisons within 3-way hybrid populations. In addition, we show that phenomic prediction models are transferrable between conditions but to a lower extent between populations and we conclude that chlorophyll a fluorescence data can serve as alternative predictors in statistical models of coffee hybrid performance. Future directions will explore their combination with other endophenotypes to further improve the prediction of growth-related traits for crops.}, language = {en} }