@article{PiroRenard2023, author = {Piro, Vitor C. and Renard, Bernhard Y.}, title = {Contamination detection and microbiome exploration with GRIMER}, series = {GigaScience}, volume = {12}, journal = {GigaScience}, publisher = {Oxford Univ. Press}, address = {Oxford}, issn = {2047-217X}, doi = {10.1093/gigascience/giad017}, pages = {13}, year = {2023}, abstract = {Background: Contamination detection is a important step that should be carefully considered in early stages when designing and performing microbiome studies to avoid biased outcomes. Detecting and removing true contaminants is challenging, especially in low-biomass samples or in studies lacking proper controls. Interactive visualizations and analysis platforms are crucial to better guide this step, to help to identify and detect noisy patterns that could potentially be contamination. Additionally, external evidence, like aggregation of several contamination detection methods and the use of common contaminants reported in the literature, could help to discover and mitigate contamination. Results: We propose GRIMER, a tool that performs automated analyses and generates a portable and interactive dashboard integrating annotation, taxonomy, and metadata. It unifies several sources of evidence to help detect contamination. GRIMER is independent of quantification methods and directly analyzes contingency tables to create an interactive and offline report. Reports can be created in seconds and are accessible for nonspecialists, providing an intuitive set of charts to explore data distribution among observations and samples and its connections with external sources. Further, we compiled and used an extensive list of possible external contaminant taxa and common contaminants with 210 genera and 627 species reported in 22 published articles. Conclusion: GRIMER enables visual data exploration and analysis, supporting contamination detection in microbiome studies. The tool and data presented are open source and available at https://gitlab.com/dacs-hpi/grimer.}, language = {en} } @article{PiroDadiSeileretal.2020, author = {Piro, Vitor C. and Dadi, Temesgen H. and Seiler, Enrico and Reinert, Knut and Renard, Bernhard Y.}, title = {ganon}, series = {Bioinformatics}, volume = {36}, journal = {Bioinformatics}, publisher = {Oxford Univ. Press}, address = {Oxford}, issn = {1367-4811}, doi = {https://doi.org/10.1093/bioinformatics/btaa458}, pages = {12 -- 20}, year = {2020}, abstract = {Motivation: The exponential growth of assembled genome sequences greatly benefits metagenomics studies. However, currently available methods struggle to manage the increasing amount of sequences and their frequent updates. Indexing the current RefSeq can take days and hundreds of GB of memory on large servers. Few methods address these issues thus far, and even though many can theoretically handle large amounts of references, time/memory requirements are prohibitive in practice. As a result, many studies that require sequence classification use often outdated and almost never truly up-to-date indices. Results: Motivated by those limitations, we created ganon, a k-mer-based read classification tool that uses Interleaved Bloom Filters in conjunction with a taxonomic clustering and a k-mer counting/filtering scheme. Ganon provides an efficient method for indexing references, keeping them updated. It requires <55 min to index the complete RefSeq of bacteria, archaea, fungi and viruses. The tool can further keep these indices up-to-date in a fraction of the time necessary to create them. Ganon makes it possible to query against very large reference sets and therefore it classifies significantly more reads and identifies more species than similar methods. When classifying a high-complexity CAMI challenge dataset against complete genomes from RefSeq, ganon shows strongly increased precision with equal or better sensitivity compared with state-of-the-art tools. With the same dataset against the complete RefSeq, ganon improved the F1-score by 65\% at the genus level. It supports taxonomy- and assembly-level classification, multiple indices and hierarchical classification.}, language = {en} }