Skip to content

Commit

Permalink
Merge pull request #667 from MatthewJM96/qiime2_custom_db
Browse files Browse the repository at this point in the history
Added QIIME2 custom reference database support.
  • Loading branch information
d4straub authored Dec 19, 2023
2 parents 481f3f8 + 6b71e4d commit a86f9c7
Show file tree
Hide file tree
Showing 20 changed files with 310 additions and 27 deletions.
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ jobs:
- "test_failed"
- "test_multi"
- "test_reftaxcustom"
- "test_qiimecustom"
- "test_doubleprimers"
- "test_iontorrent"
- "test_novaseq"
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### `Added`

- [#667](https://github.com/nf-core/ampliseq/pull/667) - Added `--qiime_ref_tax_custom` to permit custom reference database for QIIME2 taxonomic classification
- [#674](https://github.com/nf-core/ampliseq/pull/674) - Add PhytoRef database for DADA2 taxonomy assignment using `--dada_ref_taxonomy phytoref`
- [#675](https://github.com/nf-core/ampliseq/pull/675) - Add the Zehr lab nifH database for DADA2 taxonomy assignment using `--dada_ref_taxonomy zehr-nifh`

Expand Down
12 changes: 9 additions & 3 deletions assets/report_template.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -980,9 +980,15 @@ cat("\n\nDADA2 taxonomy assignments can be found in folder [dada2](../dada2) in
# Header
cat("## QIIME2\n")
cat("The taxonomic classification was performed by [QIIME2](https://www.nature.com/articles/s41587-019-0209-9)
using the database: `", params$qiime2_ref_tax_title, "`.
More details about the reference taxonomy database can be found in the ['Methods section'](#methods).\n\n", sep = "")
# indicate reference taxonomy
if ( !isFALSE(params$qiime2_ref_tax_title) ) {
cat("The taxonomic classification was performed by [QIIME2](https://www.nature.com/articles/s41587-019-0209-9)
using the database: `", params$qiime2_ref_tax_title, "`.
More details about the reference taxonomy database can be found in the ['Methods section'](#methods).\n\n", sep = "")
} else {
cat("The taxonomic classification was performed by [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) using a custom database ",
"provided by the user.\n\n", sep = "")
}
# Read file and prepare table
asv_tax <- read.table(params$qiime2_taxonomy, header = TRUE, sep = "\t")
Expand Down
33 changes: 33 additions & 0 deletions conf/test_qiimecustom.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.
Use as follows:
nextflow run nf-core/ampliseq -profile test_qiimecustom,<docker/singularity> --outdir <OUTDIR>
----------------------------------------------------------------------------------------
*/

params {
config_profile_name = 'Test custom QIIME2 reference taxonomy database profile'
config_profile_description = 'Minimal test dataset to check --qiime_ref_tax_custom'

// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
max_time = '6.h'

// Input data
FW_primer = "GTGYCAGCMGCCGCGGTAA"
RV_primer = "GGACTACNVGGGTWTCTAAT"
input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet.tsv"

// Custom reference taxonomy
qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.fna.gz,https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz"

// Skip downstream analysis with QIIME2
skip_qiime_downstream = true
skip_dada_taxonomy = true
}
3 changes: 2 additions & 1 deletion conf/test_reftaxcustom.config
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ params {
dada_assign_taxlevels = "Kingdom,Phylum,Class,Order,Family,Genus"
kraken2_ref_tax_custom = "https://genome-idx.s3.amazonaws.com/kraken/16S_Greengenes13.5_20200326.tgz"
kraken2_assign_taxlevels = "D,P,C,O"
qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tar.gz"

// Skip downstream analysis with QIIME2
skip_qiime = true
skip_qiime_downstream = true
}
8 changes: 4 additions & 4 deletions lib/WorkflowAmpliseq.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -77,21 +77,21 @@ class WorkflowAmpliseq {
}

if (params.skip_dada_taxonomy && params.sbdiexport) {
if (!params.sintax_ref_taxonomy && (params.skip_qiime || !params.qiime_ref_taxonomy)) {
if (!params.sintax_ref_taxonomy && (params.skip_qiime || (!params.qiime_ref_taxonomy && !params.qiime_ref_tax_custom))) {
Nextflow.error("Incompatible parameters: `--sbdiexport` expects taxa annotation and therefore annotation with either DADA2, SINTAX, or QIIME2 is needed.")
}
}

if ( (!params.FW_primer || !params.RV_primer) && params.qiime_ref_taxonomy && !params.skip_qiime && !params.skip_taxonomy ) {
if ( (!params.FW_primer || !params.RV_primer) && (params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.skip_qiime && !params.skip_taxonomy ) {
Nextflow.error("Incompatible parameters: `--FW_primer` and `--RV_primer` are required for cutting the QIIME2 reference database to the amplicon sequences. Please specify primers or do not use `--qiime_ref_taxonomy`.")
}

if ( (!params.FW_primer || !params.RV_primer) && params.cut_dada_ref_taxonomy && !params.skip_taxonomy ) {
Nextflow.error("Incompatible parameters: `--FW_primer` and `--RV_primer` are required for cutting the DADA2 reference database to the amplicon sequences. Please specify primers or do not use `--cut_dada_ref_taxonomy`.")
}

if (params.qiime_ref_taxonomy && params.classifier) {
Nextflow.error("Incompatible parameters: `--qiime_ref_taxonomy` will produce a classifier but `--classifier` points to a precomputed classifier, therefore, only use one of those.")
if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && params.classifier) {
Nextflow.error("Incompatible parameters: `--qiime_ref_taxonomy` and `--qiime_ref_tax_custom` will produce a classifier but `--classifier` points to a precomputed classifier, therefore, only use one of those.")
}

if (params.kraken2_ref_tax_custom && !params.kraken2_assign_taxlevels ) {
Expand Down
2 changes: 1 addition & 1 deletion lib/WorkflowMain.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class WorkflowMain {
if (params.sintax_ref_taxonomy && !params.skip_taxonomy) {
sintaxreftaxonomyExistsError(params, log)
}
if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) {
if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.skip_taxonomy && !params.classifier) {
qiimereftaxonomyExistsError(params, log)
}

Expand Down
32 changes: 32 additions & 0 deletions modules/local/gzip_decompress.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
process GZIP_DECOMPRESS {
tag "$file"
label 'process_single'

conda "conda-forge::sed=4.7 conda-forge::gzip=1.13"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
'nf-core/ubuntu:20.04' }"

input:
path(file)

output:
path("$outfile"), emit: ungzip
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
outfile = task.ext.outfile ?: file.baseName.toString().replaceFirst(/\.gz$/, "")

"""
gzip $args -c -d $file > $outfile
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gzip: \$(echo \$(gzip --version 2>&1) | sed 's/gzip //; s/ Copyright.*\$//')
END_VERSIONS
"""
}
3 changes: 2 additions & 1 deletion modules/local/summary_report.nf
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ process SUMMARY_REPORT {
kraken2_tax ? "kraken2_taxonomy='$kraken2_tax',kraken2_confidence='$params.kraken2_confidence'" : "",
kraken2_tax && !params.kraken2_ref_tax_custom ? "kraken2_ref_tax_title='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["title"]}',kraken2_ref_tax_file='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["file"]}',kraken2_ref_tax_citation='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["citation"]}'" : "",
pplace_tax ? "pplace_taxonomy='$pplace_tax',pplace_heattree='$pplace_heattree'" : "",
qiime2_tax && params.qiime_ref_taxonomy ? "qiime2_taxonomy='$qiime2_tax',qiime2_ref_tax_title='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}',qiime2_ref_tax_file='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]}',qiime2_ref_tax_citation='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["citation"]}'" : qiime2_tax ? "qiime2_taxonomy='$qiime2_tax'" : "",
qiime2_tax ? "qiime2_taxonomy='$qiime2_tax'" : "",
qiime2_tax && params.qiime_ref_taxonomy ? "qiime2_ref_tax_title='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}',qiime2_ref_tax_file='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]}',qiime2_ref_tax_citation='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["citation"]}'" : "",
run_qiime2 ? "val_used_taxonomy='$val_used_taxonomy'" : "",
filter_stats_tsv ? "filter_stats_tsv='$filter_stats_tsv',qiime2_filtertaxa='$qiime2_filtertaxa',exclude_taxa='$params.exclude_taxa',min_frequency='$params.min_frequency',min_samples='$params.min_samples'" : "",
barplot ? "barplot=TRUE" : "",
Expand Down
3 changes: 3 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ params {
skip_dada_quality = false
skip_barrnap = false
skip_qiime = false
skip_qiime_downstream = false
skip_fastqc = false
skip_alpha_rarefaction = false
skip_abundance_tables = false
Expand All @@ -108,6 +109,7 @@ params {
cut_dada_ref_taxonomy = false
sintax_ref_taxonomy = null
qiime_ref_taxonomy = null
qiime_ref_tax_custom = null
kraken2_ref_taxonomy = null
kraken2_assign_taxlevels = null
kraken2_ref_tax_custom = null
Expand Down Expand Up @@ -272,6 +274,7 @@ profiles {
test_failed { includeConfig 'conf/test_failed.config' }
test_full { includeConfig 'conf/test_full.config' }
test_reftaxcustom { includeConfig 'conf/test_reftaxcustom.config' }
test_qiimecustom { includeConfig 'conf/test_qiimecustom.config' }
test_novaseq { includeConfig 'conf/test_novaseq.config' }
test_pplace { includeConfig 'conf/test_pplace.config' }
test_sintax { includeConfig 'conf/test_sintax.config' }
Expand Down
9 changes: 9 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,11 @@
"greengenes85"
]
},
"qiime_ref_tax_custom": {
"type": "string",
"help_text": "Is preferred over `--qiime_ref_taxonomy`. A comma separated pair of (possibly gzipped) filepaths (sequence, taxonomy).",
"description": "Path to files of a custom QIIME2 reference taxonomy database (files may be gzipped)"
},
"classifier": {
"type": "string",
"description": "Path to QIIME2 trained classifier file (typically *-classifier.qza)",
Expand Down Expand Up @@ -654,6 +659,10 @@
"type": "boolean",
"description": "Skip all steps that are executed by QIIME2, including QIIME2 software download, taxonomy assignment by QIIME2, barplots, relative abundance tables, diversity analysis, differential abundance testing."
},
"skip_qiime_downstream": {
"type": "boolean",
"description": "Skip steps that are executed by QIIME2 except for taxonomic classification. Skip steps including barplots, relative abundance tables, diversity analysis, differential abundance testing."
},
"skip_taxonomy": {
"type": "boolean",
"description": "Skip taxonomic classification. Incompatible with `--sbdiexport`"
Expand Down
84 changes: 80 additions & 4 deletions subworkflows/local/qiime2_preptax.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,91 @@
* Training of a classifier with QIIME2
*/

include { UNTAR } from '../../modules/nf-core/untar/main'
include { GZIP_DECOMPRESS } from '../../modules/local/gzip_decompress.nf'
include { FORMAT_TAXONOMY_QIIME } from '../../modules/local/format_taxonomy_qiime'
include { QIIME2_EXTRACT } from '../../modules/local/qiime2_extract'
include { QIIME2_TRAIN } from '../../modules/local/qiime2_train'

workflow QIIME2_PREPTAX {
take:
ch_qiime_ref_taxonomy //channel, list of files
val_qiime_ref_taxonomy //val
FW_primer //val
RV_primer //val

main:
FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy )
ch_qiime2_preptax_versions = Channel.empty()

if (params.qiime_ref_tax_custom) {
// Handle case where we have been provided a pair of filepaths.
if ("${params.qiime_ref_tax_custom}".contains(",")) {
ch_qiime_ref_taxonomy.flatten()
.branch {
compressed: it.isFile() && it.getName().endsWith(".gz")
decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith(".tax") )
failed: true
}.set { ch_qiime_ref_tax_branched }
ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed (ends with `.gz`) or decompressed sequence (ends with `.fna`) or taxonomy file (ends with `.tax`). Please review input." }

GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.compressed)
ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions)

ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip
ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed)

ch_ref_database_fna = ch_qiime_db_files.filter {
it.getName().endsWith(".fna")
}
ch_ref_database_tax = ch_qiime_db_files.filter {
it.getName().endsWith(".tax")
}

ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax)
// Handle case we have been provided a single filepath (tarball or directory).
} else {
ch_qiime_ref_taxonomy.flatten()
.branch {
tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") )
dir: it.isDirectory()
failed: true
}.set { ch_qiime_ref_tax_branched }
ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." }

UNTAR (
ch_qiime_ref_tax_branched.tar
.map {
db ->
def meta = [:]
meta.id = val_qiime_ref_taxonomy
[ meta, db ] } )
ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(UNTAR.out.versions)

ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] }
ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir)

ch_ref_database_fna = ch_qiime_db_dir.map{ dir ->
files = file(dir.resolve("*.fna"), checkIfExists: true)
} | filter {
if (it.size() > 1) log.warn "Found multiple fasta files for QIIME2 reference database."
it.size() == 1
}
ch_ref_database_tax = ch_qiime_db_dir.map{ dir ->
files = file(dir.resolve("*.tax"), checkIfExists: true)
} | filter {
if (it.size() > 1) log.warn "Found multiple tax files for QIIME2 reference database."
it.size() == 1
}

ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax)
}
} else {
FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy )
ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(FORMAT_TAXONOMY_QIIME.out.versions)

ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax)
}

ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax)
ch_ref_database
.map {
db ->
Expand All @@ -24,10 +95,15 @@ workflow QIIME2_PREPTAX {
meta.RV_primer = RV_primer
[ meta, db ] }
.set { ch_ref_database }

QIIME2_EXTRACT ( ch_ref_database )
ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(QIIME2_EXTRACT.out.versions)

QIIME2_TRAIN ( QIIME2_EXTRACT.out.qza )
ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(QIIME2_TRAIN.out.versions)

emit:
classifier = QIIME2_TRAIN.out.qza
versions = QIIME2_TRAIN.out.versions
classifier = QIIME2_TRAIN.out.qza
versions = ch_qiime2_preptax_versions
}

2 changes: 1 addition & 1 deletion tests/pipeline/doubleprimers.nf.test.snap

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/pipeline/multi.nf.test.snap

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit a86f9c7

Please sign in to comment.