diff --git a/.editorconfig b/.editorconfig index 5f42de01..72dda289 100644 --- a/.editorconfig +++ b/.editorconfig @@ -28,6 +28,6 @@ indent_style = unset [/assets/email*] indent_size = unset -# ignore python +# ignore python and markdown [*.{py,md}] indent_style = unset diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index f730bdaa..ada206f7 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -18,7 +18,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/ampl - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/ampliseq/tree/master/.github/CONTRIBUTING.md) - [ ] If necessary, also make a PR on the nf-core/ampliseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). -- [ ] Ensure the test suite passes (`nf-test test main.nf.test -profile test,docker`). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index da5094bd..8fa004b9 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -8,12 +8,12 @@ on: types: [published] workflow_dispatch: jobs: - run-tower: + run-platform: name: Run AWS full tests if: github.repository == 'nf-core/ampliseq' runs-on: ubuntu-latest steps: - - name: Launch workflow via tower + - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} @@ -30,7 +30,7 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: Tower debug log file + name: Seqera Platform debug log file path: | - tower_action_*.log - tower_action_*.json + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 0b3d57fb..ac8d0791 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -5,13 +5,13 @@ name: nf-core AWS test on: workflow_dispatch: jobs: - run-tower: + run-platform: name: Run AWS tests if: github.repository == 'nf-core/ampliseq' runs-on: ubuntu-latest steps: - # Launch workflow using Tower CLI tool action - - name: Launch workflow via tower + # Launch workflow using Seqera Platform CLI tool action + - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} @@ -27,7 +27,7 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: Tower debug log file + name: Seqera Platform debug log file path: | - tower_action_*.log - tower_action_*.json + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 513b282e..b6cca8d1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -63,27 +63,10 @@ jobs: steps: - name: Check out pipeline code - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - - - name: Check out test data - uses: actions/checkout@v3 - with: - repository: nf-core/test-datasets - ref: ampliseq - path: test-datasets/ - fetch-depth: 1 - - - name: Replace remote paths in samplesheets - run: | - for f in ${{ github.workspace }}/test-datasets/samplesheets/*.tsv; do - sed -i "s=https://github.com/nf-core/test-datasets/raw/ampliseq/testdata/=${{ github.workspace }}/test-datasets/=g" $f - echo "========== $f ============" - cat $f - echo "========================================" - done; + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 with: version: "${{ matrix.NXF_VER }}" diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index ad2f1283..2d20d644 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -32,9 +32,12 @@ jobs: - name: Install Nextflow uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: "3.11" + python-version: "3.12" architecture: "x64" - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 with: @@ -73,11 +76,11 @@ jobs: env: NXF_SINGULARITY_CACHEDIR: ./ NXF_SINGULARITY_HOME_MOUNT: true - run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --skip_qiime --outdir ./results + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results - name: Run the downloaded pipeline (stub run not supported) id: run_pipeline if: ${{ job.steps.stub_run_pipeline.status == failure() }} env: NXF_SINGULARITY_CACHEDIR: ./ NXF_SINGULARITY_HOME_MOUNT: true - run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --skip_qiime --outdir ./results + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index a3a31ac9..a02a090d 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 with: token: ${{ secrets.nf_core_bot_auth_token }} @@ -32,9 +32,9 @@ jobs: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} # Install and run pre-commit - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: 3.11 + python-version: "3.12" - name: Install pre-commit run: pip install pre-commit diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 073e1876..1fcafe88 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,13 +14,12 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - - name: Set up Python 3.11 - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: 3.11 - cache: "pip" + python-version: "3.12" - name: Install pre-commit run: pip install pre-commit @@ -32,14 +31,14 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: "3.11" + python-version: "3.12" architecture: "x64" - name: Install dependencies @@ -60,7 +59,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index b706875f..40acc23f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@f6b0bace624032e30a85a8fd9c1a7f8f611f5737 # v3 + uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 with: workflow: linting.yml workflow_conclusion: completed diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml index 5ada136c..03ecfcf7 100644 --- a/.github/workflows/release-announcements.yml +++ b/.github/workflows/release-announcements.yml @@ -31,7 +31,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: python-version: "3.10" - name: Install dependencies diff --git a/.nf-core.yml b/.nf-core.yml index 9c60be60..d9f93009 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -14,3 +14,4 @@ update: https://github.com/nf-core/modules.git: nf-core: mafft: "feb29be775d9e41750180539e9a3bdce801d0609" +nf_core_version: "2.14.1" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index af57081f..4dc0f1dc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,6 +3,9 @@ repos: rev: "v3.1.0" hooks: - id: prettier + additional_dependencies: + - prettier@3.2.5 + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python rev: "2.7.3" hooks: diff --git a/CHANGELOG.md b/CHANGELOG.md index 53df95e7..b938f472 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,31 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## nf-core/ampliseq version 2.10.0 - 2024-06-27 + +### `Added` + +- [#751](https://github.com/nf-core/ampliseq/pull/751) - Added version R08-RS214 of curated GTDB 16S taxonomy: `sbdi-gtdb=R08-RS214-1` or `sbdi-gtdb` as parameter to `--dada_ref_taxonomy` +- [#752](https://github.com/nf-core/ampliseq/pull/752) - Added version R09-RS220 of GTDB 16S taxonomy: `gtdb=R09-RS220` or `gtdb` as parameter to `--dada_ref_taxonomy` +- [#753](https://github.com/nf-core/ampliseq/pull/753), [#756](https://github.com/nf-core/ampliseq/pull/756), [#757](https://github.com/nf-core/ampliseq/pull/757) - ANCOM-BC via QIIME2 can be used with `--ancombc`, `--ancombc_formula`, and `--ancombc_formula_reflvl`, plotting can be modified with thresholds `--ancombc_effect_size` and `--ancombc_significance` + +### `Changed` + +- [#749](https://github.com/nf-core/ampliseq/pull/749) - Create barplot also when no metadata is given +- [#753](https://github.com/nf-core/ampliseq/pull/753) - ANCOM via QIIME2 is not run anymore by default but on request whith `--ancom`, therefore `--skip_ancom` was removed + +### `Fixed` + +- [#747](https://github.com/nf-core/ampliseq/pull/747) - Template update for nf-core/tools version 2.14.1 +- [#748](https://github.com/nf-core/ampliseq/pull/748) - Updating misleading error message and documentation +- [#750](https://github.com/nf-core/ampliseq/pull/750) - Numbers in `overall_summary.tsv` were fixed (sometimes misleading in 2.9.0 for columns "denoised[F/R]", "merged", and "nochim") + +### `Dependencies` + +### `Removed` + +- [#753](https://github.com/nf-core/ampliseq/pull/753) - `--skip_ancom` was removed + ## nf-core/ampliseq version 2.9.0 - 2024-04-03 ### `Added` diff --git a/CITATIONS.md b/CITATIONS.md index 54dd5667..fa98ef19 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -135,6 +135,10 @@ > Mandal S, Van Treuren W, White RA, Eggesbø M, Knight R, Peddada SD. Analysis of composition of microbiomes: a novel method for studying microbial composition. Microb Ecol Health Dis. 2015 May 29;26:27663. doi: 10.3402/mehd.v26.27663. PMID: 26028277; PMCID: PMC4450248. +- [ANCOM-BC](https://pubmed.ncbi.nlm.nih.gov/32665548/) + + > Lin H, Peddada SD. Analysis of compositions of microbiomes with bias correction. Nat Commun. 2020 Jul 14;11(1):3514. doi: 10.1038/s41467-020-17041-7. PMID: 32665548; PMCID: PMC7360769. + - [Adonis](https://doi.org/10.1111/j.1442-9993.2001.01070.pp.x) and [VEGAN](https://CRAN.R-project.org/package=vegan) > Marti J Anderson. A new method for non-parametric multivariate analysis of variance. Austral ecology, 26(1):32–46, 2001. diff --git a/README.md b/README.md index b250eb19..beddf94c 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) -[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/ampliseq) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/ampliseq) [![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23ampliseq-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/ampliseq)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)[![Watch on YouTube](http://img.shields.io/badge/youtube-ampliseq-FFFF00?labelColor=000000&logo=youtube)](https://youtu.be/a0VOEeAvETs) @@ -44,7 +44,6 @@ By default, the pipeline currently performs the following: - Phylogenetic placement ([EPA-NG](https://github.com/Pbdas/epa-ng)) - Taxonomical classification using DADA2; alternatives are [SINTAX](https://doi.org/10.1101/074161), [Kraken2](https://doi.org/10.1186/s13059-019-1891-0), and [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) - Excludes unwanted taxa, produces absolute and relative feature/taxa count tables and plots, plots alpha rarefaction curves, computes alpha and beta diversity indices and plots thereof ([QIIME2](https://www.nature.com/articles/s41587-019-0209-9)) -- Calls differentially abundant taxa ([ANCOM](https://www.ncbi.nlm.nih.gov/pubmed/26028277)) - Creates phyloseq R objects ([Phyloseq](https://www.bioconductor.org/packages/release/bioc/html/phyloseq.html)) - Pipeline QC summaries ([MultiQC](https://multiqc.info/)) - Pipeline summary report ([R Markdown](https://github.com/rstudio/rmarkdown)) @@ -73,11 +72,10 @@ nextflow run nf-core/ampliseq \ > Adding metadata will considerably increase the output, see [metadata documentation](https://nf-co.re/ampliseq/usage#metadata). > [!TIP] -> By default the taxonomic assignment will be performed with DADA2 on SILVA database, but there are various tools and databases readily available, see [taxonomic classification documentation](https://nf-co.re/ampliseq/usage#taxonomic-classification). +> By default the taxonomic assignment will be performed with DADA2 on SILVA database, but there are various tools and databases readily available, see [taxonomic classification documentation](https://nf-co.re/ampliseq/usage#taxonomic-classification). Differential abundance testing with ([ANCOM](https://www.ncbi.nlm.nih.gov/pubmed/26028277)) or ([ANCOM-BC](https://www.ncbi.nlm.nih.gov/pubmed/32665548)) when opting in. > [!WARNING] -> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; -> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/ampliseq/usage) and the [parameter documentation](https://nf-co.re/ampliseq/parameters). diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index b94a48e9..0a63581f 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/ampliseq + This report has been generated by the nf-core/ampliseq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-ampliseq-methods-description": order: -1000 diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 7241f69a..a1bc6b4b 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -30,7 +30,6 @@ params: barplot: FALSE abundance_tables: FALSE alpha_rarefaction: FALSE - ancom: FALSE trunclenf: "" trunclenr: "" max_ee: "" @@ -102,6 +101,9 @@ params: diversity_indices_alpha: FALSE diversity_indices_beta: FALSE diversity_indices_adonis: "" + ancom: FALSE + ancombc: FALSE + ancombc_formula: FALSE picrust_pathways: FALSE sbdi: FALSE phyloseq: FALSE @@ -1531,11 +1533,57 @@ Test results are in separate folders following the scheme `Category-{treatment}- ancom <- sort( unlist( strsplit( params$ancom,"," ) ) ) for (folder in ancom) { - ancom_path <- paste0("qiime2/ancom/",folder) + ancom_path <- paste0("qiime2/",folder) cat("\n- [",ancom_path,"/index.html](../",ancom_path,"/index.html)\n", sep="") } ``` + + +```{r, results='asis'} +any_ancombc <- !isFALSE(params$ancombc) || !isFALSE(params$ancombc_formula) +``` + +```{r, eval = !isFALSE(params$any_ancombc), results='asis'} +cat(paste0(" +## ANCOM-BC + +[Analysis of Composition of Microbiomes with Bias Correction (ANCOM-BC)](https://www.ncbi.nlm.nih.gov/pubmed/32665548) +is applied to identify features that are differentially +abundant across sample groups. +Comparisons between groups of samples is performed for specific metadata that can be found in folder +")) + +if ( !isFALSE(params$ancombc) && !isFALSE(params$ancombc_formula) ) { + cat("[qiime2/ancombc/](../qiime2/ancombc/) and [qiime2/ancombc_formula/](../qiime2/ancombc_formula/)") +} else if ( !isFALSE(params$ancombc) ) { + cat("[qiime2/ancombc/](../qiime2/ancombc/)") +} else if ( !isFALSE(params$ancombc_formula) ) { + cat("[qiime2/ancombc_formula/](../qiime2/ancombc_formula/)") +} +cat(".") + +cat(paste0(" +Test results are in separate folders following the scheme `Category-{treatment}-{taxonomic level}`: +")) +``` + +```{r, eval = !isFALSE(params$ancombc), results='asis'} +ancombc <- sort( unlist( strsplit( params$ancombc,"," ) ) ) +for (folder in ancombc) { + ancombc_path <- paste0("qiime2/",folder) + cat("\n- [",ancombc_path,"/index.html](../",ancombc_path,"/index.html)\n", sep="") +} +``` + +```{r, eval = !isFALSE(params$ancombc_formula), results='asis'} +ancombc_formula <- sort( unlist( strsplit( params$ancombc_formula,"," ) ) ) +for (folder in ancombc_formula) { + ancombc_formula_path <- paste0("qiime2/",folder) + cat("\n- [",ancombc_formula_path,"/index.html](../",ancombc_formula_path,"/index.html)\n", sep="") +} +``` + ```{r, eval = !isFALSE(params$picrust_pathways), results='asis'} @@ -1767,7 +1815,7 @@ if ( as.integer(qiime2_filtertaxa_rm) > 0 ) { } ``` ```{r, eval = !isFALSE(params$val_used_taxonomy), results='asis'} -if (!isFALSE(params$barplot) || !isFALSE(params$alpha_rarefaction) || !isFALSE(params$diversity_indices_beta) || !isFALSE(params$ancom)) { +if (!isFALSE(params$barplot) || !isFALSE(params$alpha_rarefaction) || !isFALSE(params$diversity_indices_beta) || !isFALSE(params$ancom) || !isFALSE(any_ancombc)) { qiime_final <- c("Within QIIME2, the final microbial community data was") if (!isFALSE(params$barplot)) { qiime_final <- c(qiime_final,"visualized in a barplot") @@ -1782,6 +1830,9 @@ if (!isFALSE(params$barplot) || !isFALSE(params$alpha_rarefaction) || !isFALSE(p if (!isFALSE(params$ancom)) { qiime_final <- c(qiime_final,"used to find differential abundant taxa with ANCOM ([Mandal et al., 2015](https://pubmed.ncbi.nlm.nih.gov/26028277/))") } + if (!isFALSE(any_ancombc)) { + qiime_final <- c(qiime_final,"used to find differential abundant taxa with ANCOM-BC ([Lin and Peddada, 2020](https://pubmed.ncbi.nlm.nih.gov/32665548/))") + } cat(paste(qiime_final[1],qiime_final[2])) if (length(qiime_final) >= 3) { for (x in 3:length(qiime_final)) { diff --git a/assets/schema_input.json b/assets/schema_input.json index 89e9740b..269027c7 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -26,7 +26,7 @@ "format": "file-path", "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 2 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" }, "run": { "type": "string", diff --git a/bin/taxref_reformat_gtdb.sh b/bin/taxref_reformat_gtdb.sh index f2e65b56..500a008c 100755 --- a/bin/taxref_reformat_gtdb.sh +++ b/bin/taxref_reformat_gtdb.sh @@ -1,10 +1,15 @@ #!/bin/sh -# Reads the ar* and bac* SSU fasta files from GTDB (after first untarring) +# Reads the ar* and bac* SSU fasta files from GTDB (after first untarring/unzipping) # and outputs two new fasta files, one suitable for DADA2's assignTaxonomy() # and addSpecies() functions. -# Untar any tar file in the working directory +# Unzip any .fna.gz file in the working directory - versions 220 and newer +for f in *.fna.gz; do + gunzip -c $f > $(basename "$f" .gz) +done + +# Untar any tar file in the working directory - versions 214.1 and older for f in *.tar.gz; do tar xzf $f done diff --git a/conf/base.config b/conf/base.config index c16be532..32af8a73 100644 --- a/conf/base.config +++ b/conf/base.config @@ -60,9 +60,7 @@ process { errorStrategy = 'retry' maxRetries = 3 } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - } + withName:QIIME2_EXTRACT { cpus = { check_max( 12 * task.attempt, 'cpus' ) } memory = { check_max( 12.GB * task.attempt, 'memory' ) } diff --git a/conf/modules.config b/conf/modules.config index de12098b..c1930250 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -969,6 +969,57 @@ process { ] } + withName: 'QIIME2_ANCOMBC_TAX|QIIME2_ANCOMBC_ASV' { + // additional arguments for "qiime composition ancombc", deviating from default: --p-lib-cut (0), --p-conserve (--p-no-conserve) + ext.args = '--p-prv-cut 0.1 --p-lib-cut 500 --p-alpha 0.05 --p-conserve' + // additional arguments for "qiime composition da-barplot" + ext.args2 = { [ + params.ancombc_effect_size ? "--p-effect-size-threshold ${params.ancombc_effect_size}" : '', + params.ancombc_significance ? "--p-significance-threshold ${params.ancombc_significance}" : '', + '--p-label-limit 1000' + ].join(' ') } + publishDir = [ + [ + path: { "${params.outdir}/qiime2/ancombc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('.qzv') || filename.endsWith('.qza') ? null : filename } + ], + [ + path: { "${params.outdir}/qiime2/ancombc/qza_qzv" }, + mode: params.publish_dir_mode, + pattern: "*{.qza,.qzv}", + enabled: params.save_intermediates + ] + ] + } + + withName: 'ANCOMBC_FORMULA_TAX|ANCOMBC_FORMULA_ASV' { + // additional arguments for "qiime composition ancombc", deviating from default: --p-lib-cut (0), --p-conserve (--p-no-conserve) + ext.args = { [ + params.ancombc_formula_reflvl ? "--p-reference-levels ${params.ancombc_formula_reflvl}" : '', + '--p-prv-cut 0.1 --p-lib-cut 500 --p-alpha 0.05 --p-conserve' + ].join(' ') } + // additional arguments for "qiime composition da-barplot" + ext.args2 = { [ + params.ancombc_effect_size ? "--p-effect-size-threshold ${params.ancombc_effect_size}" : '', + params.ancombc_significance ? "--p-significance-threshold ${params.ancombc_significance}" : '', + '--p-label-limit 1000' + ].join(' ') } + publishDir = [ + [ + path: { "${params.outdir}/qiime2/ancombc_formula" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') || filename.endsWith('.qzv') || filename.endsWith('.qza') ? null : filename } + ], + [ + path: { "${params.outdir}/qiime2/ancombc_formula/qza_qzv" }, + mode: params.publish_dir_mode, + pattern: "*{.qza,.qzv}", + enabled: params.save_intermediates + ] + ] + } + withName: PICRUST { ext.args = "-t epa-ng --remove_intermediate" publishDir = [ @@ -1007,15 +1058,7 @@ process { ] } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - pattern: '*_versions.yml' - ] - } - - withName: MULTIQC { + withName: 'MULTIQC' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ path: { "${params.outdir}/multiqc" }, diff --git a/conf/ref_databases.config b/conf/ref_databases.config index 24393450..c2db0ed2 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -26,11 +26,18 @@ params { dbversion = "COIDB 221216 (https://doi.org/10.17044/scilifelab.20514192.v2)" } 'gtdb' { - title = "GTDB - Genome Taxonomy Database - Release R08-RS214.1" - file = [ "https://data.ace.uq.edu.au/public/gtdb/data/releases/release214/214.1/genomic_files_reps/bac120_ssu_reps_r214.tar.gz", "https://data.ace.uq.edu.au/public/gtdb/data/releases/release214/214.1/genomic_files_reps/ar53_ssu_reps_r214.tar.gz" ] + title = "GTDB - Genome Taxonomy Database - Release R09-RS220" + file = [ "https://data.ace.uq.edu.au/public/gtdb/data/releases/release220/220.0/genomic_files_reps/bac120_ssu_reps_r220.fna.gz", "https://data.ace.uq.edu.au/public/gtdb/data/releases/release220/220.0/genomic_files_reps/ar53_ssu_reps_r220.fna.gz" ] citation = "Parks DH, Chuvochina M, Waite DW, Rinke C, Skarshewski A, Chaumeil PA, Hugenholtz P. A standardized bacterial taxonomy based on genome phylogeny substantially revises the tree of life. Nat Biotechnol. 2018 Nov;36(10):996-1004. doi: 10.1038/nbt.4229. Epub 2018 Aug 27. PMID: 30148503." fmtscript = "taxref_reformat_gtdb.sh" - dbversion = "GTDB R08-RS214.1 (https://data.ace.uq.edu.au/public/gtdb/data/releases/release214/214.1)" + dbversion = "GTDB R09-RS220 (https://data.ace.uq.edu.au/public/gtdb/data/releases/release220/220.0)" + } + 'gtdb=R09-RS220' { + title = "GTDB - Genome Taxonomy Database - Release R09-RS220" + file = [ "https://data.ace.uq.edu.au/public/gtdb/data/releases/release220/220.0/genomic_files_reps/bac120_ssu_reps_r220.fna.gz", "https://data.ace.uq.edu.au/public/gtdb/data/releases/release220/220.0/genomic_files_reps/ar53_ssu_reps_r220.fna.gz" ] + citation = "Parks DH, Chuvochina M, Waite DW, Rinke C, Skarshewski A, Chaumeil PA, Hugenholtz P. A standardized bacterial taxonomy based on genome phylogeny substantially revises the tree of life. Nat Biotechnol. 2018 Nov;36(10):996-1004. doi: 10.1038/nbt.4229. Epub 2018 Aug 27. PMID: 30148503." + fmtscript = "taxref_reformat_gtdb.sh" + dbversion = "GTDB R09-RS220 (https://data.ace.uq.edu.au/public/gtdb/data/releases/release220/220.0)" } 'gtdb=R08-RS214' { title = "GTDB - Genome Taxonomy Database - Release R08-RS214.1" @@ -123,11 +130,19 @@ params { dbversion = "RDP 18/11.5 (https://zenodo.org/record/4310151/)" } 'sbdi-gtdb' { - title = "SBDI-GTDB - Sativa curated 16S GTDB database - Release R07-RS207-1" - file = [ "https://scilifelab.figshare.com/ndownloader/files/36980767", "https://scilifelab.figshare.com/ndownloader/files/36980788" ] - citation = "Lundin D, Andersson A. SBDI Sativa curated 16S GTDB database. FigShare. doi: 10.17044/scilifelab.14869077.v4" + title = "SBDI-GTDB - Sativa curated 16S GTDB database - Release R08-RS214-1" + file = [ "https://figshare.scilifelab.se/ndownloader/files/45818841", "https://figshare.scilifelab.se/ndownloader/files/45818850" ] + citation = "Lundin D, Andersson A. SBDI Sativa curated 16S GTDB database. FigShare. doi: 10.17044/scilifelab.14869077.v6" fmtscript = "taxref_reformat_sbdi-gtdb.sh" - dbversion = "SBDI-GTDB-R07-RS207-1 (https://scilifelab.figshare.com/articles/dataset/SBDI_Sativa_curated_16S_GTDB_database/14869077/4)" + dbversion = "SBDI-GTDB-R08-RS214-1 (https://figshare.scilifelab.se/articles/dataset/SBDI_Sativa_curated_16S_GTDB_database/14869077/6)" + taxlevels = "Domain,Kingdom,Phylum,Class,Order,Family,Genus,Species" + } + 'sbdi-gtdb=R08-RS214-1' { + title = "SBDI-GTDB - Sativa curated 16S GTDB database - Release R08-RS214-1" + file = [ "https://figshare.scilifelab.se/ndownloader/files/45818841", "https://figshare.scilifelab.se/ndownloader/files/45818850" ] + citation = "Lundin D, Andersson A. SBDI Sativa curated 16S GTDB database. FigShare. doi: 10.17044/scilifelab.14869077.v6" + fmtscript = "taxref_reformat_sbdi-gtdb.sh" + dbversion = "SBDI-GTDB-R08-RS214-1 (https://figshare.scilifelab.se/articles/dataset/SBDI_Sativa_curated_16S_GTDB_database/14869077/6)" taxlevels = "Domain,Kingdom,Phylum,Class,Order,Family,Genus,Species" } 'sbdi-gtdb=R07-RS207-1' { diff --git a/conf/test.config b/conf/test.config index afd370e4..6c75a9cf 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,8 +22,8 @@ params { // Input data FW_primer = "GTGYCAGCMGCCGCGGTAA" RV_primer = "GGACTACNVGGGTWTCTAAT" - input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet.tsv" - metadata = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Metadata.tsv" + input = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Samplesheet.tsv" + metadata = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Metadata.tsv" dada_ref_taxonomy = "gtdb=R07-RS207" cut_dada_ref_taxonomy = true qiime_ref_taxonomy = "greengenes85" @@ -47,4 +47,11 @@ params { diversity_rarefaction_depth = 500 vsearch_cluster = true + + // Test ANCOMBC + ancombc = true + ancombc_formula = "treatment1" + ancombc_formula_reflvl = "treatment1::b" + ancombc_effect_size = 2 + ancombc_significance = 0.00001 } diff --git a/conf/test_doubleprimers.config b/conf/test_doubleprimers.config index 730393db..8a6e1e86 100644 --- a/conf/test_doubleprimers.config +++ b/conf/test_doubleprimers.config @@ -23,7 +23,7 @@ params { FW_primer = "NNNNCCTAHGGGRBGCAGCAG" RV_primer = "GACTACHVGGGTATCTAATCC" double_primer = true - input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet_double_primer.tsv" + input = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Samplesheet_double_primer.tsv" trunc_qmin = 30 kraken2_ref_taxonomy = "greengenes" diff --git a/conf/test_failed.config b/conf/test_failed.config index 12509a25..157a2f2a 100644 --- a/conf/test_failed.config +++ b/conf/test_failed.config @@ -22,8 +22,8 @@ params { // Input data FW_primer = "GTGYCAGCMGCCGCGGTAA" RV_primer = "GGACTACNVGGGTWTCTAAT" - input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet_failed_sample.tsv" - metadata = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Metadata_failed_sample.tsv" + input = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Samplesheet_failed_sample.tsv" + metadata = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Metadata_failed_sample.tsv" dada_ref_tax_custom = "https://zenodo.org/record/4310151/files/rdp_train_set_18.fa.gz" skip_dada_addspecies = true cut_dada_ref_taxonomy = true @@ -32,6 +32,7 @@ params { ignore_failed_trimming = true ignore_empty_input_files = true ignore_failed_filtering = true + ancombc = true //this is to remove low abundance ASVs to reduce runtime of downstream processes min_samples = 2 diff --git a/conf/test_fasta.config b/conf/test_fasta.config index fbb60f87..e5ffa3e2 100644 --- a/conf/test_fasta.config +++ b/conf/test_fasta.config @@ -20,7 +20,7 @@ params { max_time = '6.h' // Input data - input_fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/testdata/ASV_seqs.fasta" + input_fasta = params.pipelines_testdata_base_path + "ampliseq/testdata/ASV_seqs.fasta" dada_ref_taxonomy = "rdp=18" dada_assign_taxlevels = "K,P,C,O,F,Genus" diff --git a/conf/test_full.config b/conf/test_full.config index a17d4fa8..127b4acc 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -17,8 +17,8 @@ params { // Input data for full size test FW_primer = "GTGYCAGCMGCCGCGGTAA" RV_primer = "GGACTACNVGGGTWTCTAAT" - input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet_full.tsv" - metadata = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Metadata_full.tsv" + input = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Samplesheet_full.tsv" + metadata = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Metadata_full.tsv" dada_ref_taxonomy = "rdp" qiime_ref_taxonomy = "greengenes85" trunc_qmin = 35 @@ -35,4 +35,8 @@ params { //run adonis qiime_adonis_formula = "habitat" + + //run ANCOM & ANCOMBC + ancom = true + ancombc = true } diff --git a/conf/test_iontorrent.config b/conf/test_iontorrent.config index a8b94947..aa53e40b 100644 --- a/conf/test_iontorrent.config +++ b/conf/test_iontorrent.config @@ -23,7 +23,7 @@ params { FW_primer = "GTGARTCATCGARTCTTTG" RV_primer = "TCCTCSSCTTATTGATATGC" sintax_ref_taxonomy = "unite-fungi=8.2" - input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet_it_SE_ITS.tsv" + input = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Samplesheet_it_SE_ITS.tsv" iontorrent = true max_ee = 5 skip_qiime = true diff --git a/conf/test_multi.config b/conf/test_multi.config index 20cf7c29..aaa07a81 100644 --- a/conf/test_multi.config +++ b/conf/test_multi.config @@ -26,5 +26,5 @@ params { skip_dada_quality = true dada_ref_taxonomy = "rdp=18" skip_dada_addspecies = true - input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet_multi.tsv" + input = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Samplesheet_multi.tsv" } diff --git a/conf/test_multiregion.config b/conf/test_multiregion.config index 71518374..41fa66ff 100644 --- a/conf/test_multiregion.config +++ b/conf/test_multiregion.config @@ -20,9 +20,9 @@ params { max_time = '6.h' // Input data - input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/samplesheet_multiregion.tsv" - metadata = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/metadata_multiregion.tsv" - multiregion = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/regions_multiregion.tsv" + input = params.pipelines_testdata_base_path + "ampliseq/samplesheets/samplesheet_multiregion.tsv" + metadata = params.pipelines_testdata_base_path + "ampliseq/samplesheets/metadata_multiregion.tsv" + multiregion = params.pipelines_testdata_base_path + "ampliseq/samplesheets/regions_multiregion.tsv" sidle_ref_taxonomy = "greengenes88" // Prevent default taxonomic classification @@ -31,4 +31,7 @@ params { // Reduce runtimes skip_alpha_rarefaction = true tax_agglom_max = 3 + + // Run ANCOM + ancom = true } diff --git a/conf/test_novaseq.config b/conf/test_novaseq.config index b82e4d27..9e096335 100644 --- a/conf/test_novaseq.config +++ b/conf/test_novaseq.config @@ -20,7 +20,7 @@ params { max_time = '6.h' // Input data - input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet_novaseq.tsv" + input = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Samplesheet_novaseq.tsv" illumina_novaseq = true // Codon filter, only for the first 2 codons (because otherwise all is filtered) diff --git a/conf/test_pacbio_its.config b/conf/test_pacbio_its.config index fea19133..64d89ea8 100644 --- a/conf/test_pacbio_its.config +++ b/conf/test_pacbio_its.config @@ -23,8 +23,8 @@ params { FW_primer = "CTTGGTCATTTAGAGGAAGTAA" RV_primer = "TCCTGAGGGAAACTTCG" sintax_ref_taxonomy = "unite-fungi=8.2" - input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet_pacbio_ITS.tsv" - metadata = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Metadata_pacbio_ITS.tsv" + input = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Samplesheet_pacbio_ITS.tsv" + metadata = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Metadata_pacbio_ITS.tsv" pacbio = true max_ee = 12 cut_its = "full" diff --git a/conf/test_pplace.config b/conf/test_pplace.config index ecd5424d..e47861e0 100644 --- a/conf/test_pplace.config +++ b/conf/test_pplace.config @@ -22,8 +22,8 @@ params { // Input data FW_primer = "GTGYCAGCMGCCGCGGTAA" RV_primer = "GGACTACNVGGGTWTCTAAT" - input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet.tsv" - metadata = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Metadata.tsv" + input = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Samplesheet.tsv" + metadata = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Metadata.tsv" skip_dada_taxonomy = true qiime_ref_taxonomy = "greengenes85" filter_ssu = "bac" @@ -33,10 +33,10 @@ params { min_frequency = 10 // pplace - pplace_tree = "https://github.com/nf-core/test-datasets/raw/phyloplace/testdata/cyanos_16s.newick" - pplace_aln = "https://github.com/nf-core/test-datasets/raw/phyloplace/testdata/cyanos_16s.alnfna" + pplace_tree = params.pipelines_testdata_base_path + "phyloplace/testdata/cyanos_16s.newick" + pplace_aln = params.pipelines_testdata_base_path + "phyloplace/testdata/cyanos_16s.alnfna" pplace_model = "GTR+F+I+I+R3" - pplace_taxonomy = "https://github.com/nf-core/test-datasets/raw/phyloplace/testdata/cyanos_16s.taxonomy.tsv" + pplace_taxonomy = params.pipelines_testdata_base_path + "phyloplace/testdata/cyanos_16s.taxonomy.tsv" pplace_name = "test_pplace" // Adjust taxonomic levels @@ -46,5 +46,4 @@ params { // Skip some steps to reduce runtime skip_alpha_rarefaction = true skip_fastqc = true - skip_ancom = true } diff --git a/conf/test_qiimecustom.config b/conf/test_qiimecustom.config index dd02eb4e..e3358fa0 100644 --- a/conf/test_qiimecustom.config +++ b/conf/test_qiimecustom.config @@ -22,10 +22,10 @@ params { // Input data FW_primer = "GTGYCAGCMGCCGCGGTAA" RV_primer = "GGACTACNVGGGTWTCTAAT" - input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet.tsv" + input = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Samplesheet.tsv" // Custom reference taxonomy - qiime_ref_tax_custom = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/testdata/85_greengenes.fna.gz,https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/testdata/85_greengenes.tax.gz" + qiime_ref_tax_custom = params.pipelines_testdata_base_path + "ampliseq/testdata/85_greengenes.fna.gz,https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/testdata/85_greengenes.tax.gz" // Skip downstream analysis with QIIME2 skip_qiime_downstream = true diff --git a/conf/test_reftaxcustom.config b/conf/test_reftaxcustom.config index 40408bfb..fa779ff5 100644 --- a/conf/test_reftaxcustom.config +++ b/conf/test_reftaxcustom.config @@ -22,7 +22,7 @@ params { // Input data FW_primer = "GTGYCAGCMGCCGCGGTAA" RV_primer = "GGACTACNVGGGTWTCTAAT" - input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet.tsv" + input = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Samplesheet.tsv" // Custom reference taxonomy dada_ref_tax_custom = "https://zenodo.org/record/4310151/files/rdp_train_set_18.fa.gz" @@ -30,7 +30,7 @@ params { dada_assign_taxlevels = "Kingdom,Phylum,Class,Order,Family,Genus" kraken2_ref_tax_custom = "https://genome-idx.s3.amazonaws.com/kraken/16S_Greengenes13.5_20200326.tgz" kraken2_assign_taxlevels = "D,P,C,O" - qiime_ref_tax_custom = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/testdata/85_greengenes.tar.gz" + qiime_ref_tax_custom = params.pipelines_testdata_base_path + "ampliseq/testdata/85_greengenes.tar.gz" // Skip downstream analysis with QIIME2 skip_qiime_downstream = true diff --git a/conf/test_single.config b/conf/test_single.config index b24e852b..3470f5c5 100644 --- a/conf/test_single.config +++ b/conf/test_single.config @@ -22,7 +22,7 @@ params { // Input data FW_primer = "GTGYCAGCMGCCGCGGTAA" RV_primer = "GGACTACNVGGGTWTCTAAT" - input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet.tsv" + input = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Samplesheet.tsv" single_end = true dada_ref_taxonomy = "rdp=18" cut_dada_ref_taxonomy = true diff --git a/conf/test_sintax.config b/conf/test_sintax.config index 0021d195..9f1c4f04 100644 --- a/conf/test_sintax.config +++ b/conf/test_sintax.config @@ -22,8 +22,8 @@ params { // Input data FW_primer = "CTTGGTCATTTAGAGGAAGTAA" RV_primer = "TCCTGAGGGAAACTTCG" - input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet_pacbio_ITS.tsv" - metadata = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Metadata_pacbio_ITS.tsv" + input = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Samplesheet_pacbio_ITS.tsv" + metadata = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Metadata_pacbio_ITS.tsv" pacbio = true max_ee = 12 cut_its = "its2" @@ -40,6 +40,7 @@ params { //restrict ANCOM analysis to higher taxonomic levels tax_agglom_max = 4 + ancom = true sbdiexport = true diff --git a/docs/images/ampliseq_workflow.png b/docs/images/ampliseq_workflow.png index 089035ef..7cc2c4ba 100644 Binary files a/docs/images/ampliseq_workflow.png and b/docs/images/ampliseq_workflow.png differ diff --git a/docs/images/ampliseq_workflow.svg b/docs/images/ampliseq_workflow.svg index af474614..ce777230 100644 --- a/docs/images/ampliseq_workflow.svg +++ b/docs/images/ampliseq_workflow.svg @@ -265,8 +265,8 @@ showgrid="false" inkscape:current-layer="layer1" inkscape:document-units="mm" - inkscape:cy="153.93758" - inkscape:cx="405.08879" + inkscape:cy="149.93758" + inkscape:cx="208.08879" inkscape:zoom="1" inkscape:pageshadow="2" inkscape:pageopacity="0.0" @@ -934,7 +934,7 @@ height="7.7119999" width="31.516283" id="rect4770-6-8-5-4-1-4" - style="fill:#24af63;fill-opacity:1;stroke:#000000;stroke-width:0.30000001;stroke-linecap:square;stroke-linejoin:bevel;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:normal" /> + style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.30000001;stroke-linecap:square;stroke-linejoin:bevel;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:normal" /> Output files - `qiime2/rel_abundance_tables/` - - `rel-table-*.tsv`: Tab-separated absolute abundance table at taxa level `*`, where `*` ranges by default from 2 to 6, specified by the `--tax_agglom_min` and `--tax_agglom_max` parameters. + - `rel-table-*.tsv`: Tab-separated relative abundance table at taxa level `*`, where `*` ranges by default from 2 to 6, specified by the `--tax_agglom_min` and `--tax_agglom_max` parameters. - `rel-table-ASV.tsv`: Tab-separated relative abundance table for all ASVs. - - `rel-table-ASV_with-DADA2-tax.tsv`: Tab-separated table for all ASVs with DADA2 taxonomic classification, sequence and relative abundance. - - `rel-table-ASV_with-QIIME2-tax.tsv`: Tab-separated table for all ASVs with QIIME2 taxonomic classification, sequence and relative abundance. - - `rel-table-ASV_with-PPLACE-tax.tsv`: Tab-separated table for all ASVs with EPA-NG - Gappa taxonomic classification, sequence and relative abundance. + - `rel-table-ASV_with-*-tax.tsv`: Tab-separated table for all ASVs with taxonomic classification, sequence and relative abundance. The star (\*) is replaced by the taxonomic classification method. @@ -546,11 +544,13 @@ Furthermore, ADONIS permutation-based statistical test in vegan-R determine whet -#### ANCOM +#### Differential abundance analysis -Analysis of Composition of Microbiomes ([ANCOM](https://www.ncbi.nlm.nih.gov/pubmed/26028277)) is applied to identify features that are differentially abundant across sample groups. A key assumption made by ANCOM is that few taxa (less than about 25%) will be differentially abundant between groups otherwise the method will be inaccurate. Parameter `--ancom_sample_min_count` sets the minimum sample counts to retain a sample for ANCOM analysis. +##### ANCOM -ANCOM is applied to each suitable or specified metadata column for 5 taxonomic levels (2-6). +Analysis of Composition of Microbiomes ([ANCOM](https://www.ncbi.nlm.nih.gov/pubmed/26028277)) is applied to identify features that are differentially abundant across sample groups. A key assumption made by ANCOM is that few taxa (less than about 25%) will be differentially abundant between groups otherwise the method will be inaccurate. + +On request (`--ancom`), ANCOM is applied to each suitable or specified metadata column for 5 taxonomic levels (2-6).
Output files @@ -562,6 +562,28 @@ ANCOM is applied to each suitable or specified metadata column for 5 taxonomic l
+##### ANCOM-BC + +Analysis of Composition of Microbiomes with Bias Correction ([ANCOM-BC](https://www.ncbi.nlm.nih.gov/pubmed/32665548)) is applied to identify features that are differentially abundant across sample groups. + +On request (`--ancombc`), ANCOM-BC is applied to each suitable or specified metadata column for 5 taxonomic levels (2-6). Independently, multiple comma separated formula can be submitted to ANCOM-BC by `--ancombc_formula`. + +
+Output files + +- `qiime2/ancombc/` or `qiime2/ancombc_formula/` + - `da_barplot/Category--/` + - `index.html`: Links to interactive plots. + - `-ancombc-barplot.html`: Interactive plots. + - `differentials/Category--/` + - `index.html`: Visualised table of statistical results. + - `*.csv*`: Comma-separated tables of statistical results. + - formula: metadata category / formula that was tested + - taxonomic level: level-2 (phylum), level-3 (class), level-4 (order), level-5 (family), level-6 (genus), ASV + - treatment: Changes for that treatment group + +
+ ### PICRUSt2 PICRUSt2 (Phylogenetic Investigation of Communities by Reconstruction of Unobserved States) is a software for predicting functional abundances based only on marker gene sequences. On demand (`--picrust`), Enzyme Classification numbers (EC), KEGG orthologs (KO) and MetaCyc ontology predictions will be made for each sample. diff --git a/docs/usage.md b/docs/usage.md index 7e9a9870..a7dad2b6 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -16,6 +16,7 @@ - [Taxonomic classification](#taxonomic-classification) - [Multiple region analysis with Sidle](#multiple-region-analysis-with-sidle) - [Metadata](#metadata) + - [Differential abundance analysis](#differential-abundance-analysis) - [Updating the pipeline](#updating-the-pipeline) - [Reproducibility](#reproducibility) - [Core Nextflow arguments](#core-nextflow-arguments) @@ -306,6 +307,10 @@ Sample identifiers should be 36 characters long or less, and also contain only A The columns which are to be assessed can be specified by `--metadata_category`. If `--metadata_category` isn't specified than all columns that fit the specification are automatically chosen. +### Differential abundance analysis + +Differential abundance analysis for relative abundance from microbial community analysis are plagued by multiple issues that aren't fully solved yet. But some approaches seem promising, for example Analysis of Composition of Microbiomes with Bias Correction ([ANCOM-BC](https://pubmed.ncbi.nlm.nih.gov/32665548/)). [ANCOM](https://pubmed.ncbi.nlm.nih.gov/26028277/) and ANCOM-BC are integrated into the pipeline, but only executed on request via `--ancom` and `--ancombc`, more details in the [nf-core/ampliseq website parameter documentation](https://nf-co.re/ampliseq/parameters/#differential-abundance-analysis). + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: @@ -366,6 +371,8 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) - `apptainer` - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `wave` + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). - `conda` - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. diff --git a/modules.json b/modules.json index 52d78e82..444cbe2c 100644 --- a/modules.json +++ b/modules.json @@ -22,7 +22,7 @@ }, "fastqc": { "branch": "master", - "git_sha": "f4ae1d942bd50c5c0b9bd2de1393ce38315ba57c", + "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", "installed_by": ["modules"] }, "gappa/examineassign": { @@ -118,7 +118,7 @@ }, "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { diff --git a/modules/local/dada2_stats.nf b/modules/local/dada2_stats.nf index eb36195b..788d54f2 100644 --- a/modules/local/dada2_stats.nf +++ b/modules/local/dada2_stats.nf @@ -41,14 +41,23 @@ process DADA2_STATS { dadaFs = readRDS("${denoised[0]}") dadaRs = readRDS("${denoised[1]}") mergers = readRDS("$mergers") - seqtab.nochim = readRDS("$seqtab_nochim") + nochim = readRDS("$seqtab_nochim") #track reads through pipeline getN <- function(x) sum(getUniques(x)) if ( nrow(filter_and_trim) == 1 ) { - track <- cbind(filter_and_trim, getN(dadaFs), getN(dadaRs), getN(mergers), rowSums(seqtab.nochim)) + track <- cbind(filter_and_trim, getN(dadaFs), getN(dadaRs), getN(mergers), rowSums(nochim)) } else { - track <- cbind(filter_and_trim, sapply(dadaFs, getN), sapply(dadaRs, getN), sapply(mergers, getN), rowSums(seqtab.nochim)) + dadaFs_getN <- data.frame( sapply(dadaFs, getN) ) + dadaRs_getN <- data.frame( sapply(dadaRs, getN) ) + mergers_getN <- data.frame( sapply(mergers, getN) ) + nochim_rowSums <- data.frame( rowSums(nochim) ) + track <- cbind( + filter_and_trim[order(rownames(filter_and_trim)), ], + dadaFs_getN[order(rownames(dadaFs_getN)), ], + dadaRs_getN[order(rownames(dadaRs_getN)), ], + mergers_getN[order(rownames(mergers_getN)), ], + nochim_rowSums[order(rownames(nochim_rowSums)), ] ) } colnames(track) <- c("DADA2_input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim") rownames(track) <- sub(pattern = "_1.fastq.gz\$", replacement = "", rownames(track)) #this is when cutadapt is skipped! @@ -77,14 +86,19 @@ process DADA2_STATS { #read data dadaFs = readRDS("${denoised[0]}") - seqtab.nochim = readRDS("$seqtab_nochim") + nochim = readRDS("$seqtab_nochim") #track reads through pipeline getN <- function(x) sum(getUniques(x)) if ( nrow(filter_and_trim) == 1 ) { - track <- cbind(filter_and_trim, getN(dadaFs), rowSums(seqtab.nochim)) + track <- cbind(filter_and_trim, getN(dadaFs), rowSums(nochim)) } else { - track <- cbind(filter_and_trim, sapply(dadaFs, getN), rowSums(seqtab.nochim)) + dadaFs_getN <- data.frame( sapply(dadaFs, getN) ) + nochim_rowSums <- data.frame( rowSums(nochim) ) + track <- cbind( + filter_and_trim[order(rownames(filter_and_trim)), ], + dadaFs_getN[order(rownames(dadaFs_getN)), ], + nochim_rowSums[order(rownames(nochim_rowSums)), ] ) } colnames(track) <- c("DADA2_input", "filtered", "denoised", "nonchim") track <- cbind(sample = sub(pattern = "(.*?)\\\\..*\$", replacement = "\\\\1", rownames(track)), track) diff --git a/modules/local/qiime2_ancombc_asv.nf b/modules/local/qiime2_ancombc_asv.nf new file mode 100644 index 00000000..22f264da --- /dev/null +++ b/modules/local/qiime2_ancombc_asv.nf @@ -0,0 +1,68 @@ +process QIIME2_ANCOMBC_ASV { + tag "${table.baseName} ${formula}" + label 'process_medium' + label 'single_cpu' + label 'process_long' + label 'error_ignore' + + container "qiime2/core:2023.7" + + input: + tuple path(metadata), path(table), val(formula) + + output: + path("da_barplot/*") , emit: da_barplot + path("differentials/*"), emit: differentials + path("*.qza") , emit: qza + path("*.qzv") , emit: qzv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def formula = formula ?: "${table.baseName}" + """ + export XDG_CONFIG_HOME="./xdgconfig" + export MPLCONFIGDIR="./mplconfigdir" + export NUMBA_CACHE_DIR="./numbacache" + + qiime composition ancombc \\ + --i-table "${table}" \\ + --m-metadata-file "${metadata}" \\ + $args \\ + --p-formula '${formula}' \\ + --o-differentials "${formula}.differentials.qza" \\ + --verbose + qiime tools export \\ + --input-path "${formula}.differentials.qza" \\ + --output-path "differentials/Category-${formula}-ASV" + + # Generate tabular view of ANCOM-BC output + qiime composition tabulate \\ + --i-data "${formula}.differentials.qza" \\ + --o-visualization "${formula}.differentials.qzv" + qiime tools export \\ + --input-path "${formula}.differentials.qzv" \\ + --output-path "differentials/Category-${formula}-ASV" + + # Generate bar plot views of ANCOM-BC output + qiime composition da-barplot \\ + --i-data "${formula}.differentials.qza" \\ + $args2 \\ + --o-visualization "${formula}.da_barplot.qzv" + qiime tools export --input-path "${formula}.da_barplot.qzv" \\ + --output-path "da_barplot/Category-${formula}-ASV" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + qiime2: \$( qiime --version | sed '1!d;s/.* //' ) + END_VERSIONS + """ +} diff --git a/modules/local/qiime2_ancombc_tax.nf b/modules/local/qiime2_ancombc_tax.nf new file mode 100644 index 00000000..4d1a6da5 --- /dev/null +++ b/modules/local/qiime2_ancombc_tax.nf @@ -0,0 +1,91 @@ +process QIIME2_ANCOMBC_TAX { + tag "${table.baseName} ${formula} - ${taxlevel}" + label 'process_medium' + label 'single_cpu' + + container "qiime2/core:2023.7" + + input: + tuple path(metadata), path(table), path(taxonomy), val(taxlevel), val(formula) + + output: + path("da_barplot/*") , emit: da_barplot + path("differentials/*"), emit: differentials + path("*.qza") , emit: qza, optional: true + path("*.qzv") , emit: qzv, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def formula = formula ?: "${table.baseName}" + def prefix = "lvl${taxlevel}-${formula}" + def outfolder = "Category-${formula}-level-${taxlevel}" + """ + export XDG_CONFIG_HOME="./xdgconfig" + export MPLCONFIGDIR="./mplconfigdir" + export NUMBA_CACHE_DIR="./numbacache" + + # Sum data at the specified level + qiime taxa collapse \\ + --i-table "${table}" \\ + --i-taxonomy "${taxonomy}" \\ + --p-level ${taxlevel} \\ + --o-collapsed-table "${prefix}.qza" + + # Extract summarised table and output a file with the number of taxa + qiime tools export \\ + --input-path "${prefix}.qza" \\ + --output-path exported/ + biom convert \\ + -i exported/feature-table.biom \\ + -o "${prefix}.feature-table.tsv" \\ + --to-tsv + + if [ \$(grep -v '^#' -c "${prefix}.feature-table.tsv") -lt 2 ]; then + mkdir differentials + echo ${taxlevel} > differentials/\"WARNING Summing your data at taxonomic level ${taxlevel} produced less than two rows (taxa), ANCOMBC can't proceed -- did you specify a bad reference taxonomy?\".txt + mkdir da_barplot + echo ${taxlevel} > da_barplot/\"WARNING Summing your data at taxonomic level ${taxlevel} produced less than two rows (taxa), ANCOMBC can't proceed -- did you specify a bad reference taxonomy?\".txt + else + qiime composition ancombc \\ + --i-table "${prefix}.qza" \\ + --m-metadata-file "${metadata}" \\ + $args \\ + --p-formula '${formula}' \\ + --o-differentials "${prefix}.differentials.qza" \\ + --verbose + qiime tools export \\ + --input-path "${prefix}.differentials.qza" \\ + --output-path "differentials/${outfolder}" + + # Generate tabular view of ANCOM-BC output + qiime composition tabulate \\ + --i-data "${prefix}.differentials.qza" \\ + --o-visualization "${prefix}.differentials.qzv" + qiime tools export \\ + --input-path "${prefix}.differentials.qzv" \\ + --output-path "differentials/${outfolder}" + + # Generate bar plot views of ANCOM-BC output + qiime composition da-barplot \\ + --i-data "${prefix}.differentials.qza" \\ + $args2 \\ + --o-visualization "${prefix}.da_barplot.qzv" + qiime tools export --input-path "${prefix}.da_barplot.qzv" \\ + --output-path "da_barplot/${outfolder}" + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + qiime2: \$( qiime --version | sed '1!d;s/.* //' ) + END_VERSIONS + """ +} diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 2306a061..309dcdf7 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -52,7 +52,9 @@ process SUMMARY_REPORT { path(diversity_indices_alpha, stageAs: 'alpha_diversity/*') // prevent folder name collisons path(diversity_indices_beta, stageAs: 'beta_diversity/*') // prevent folder name collisons path(diversity_indices_adonis, stageAs: 'beta_diversity/adonis/*') // prevent folder name collisons - path(ancom) + path(ancom, stageAs: 'ancom/*') + path(ancombc, stageAs: 'ancombc/da_barplot/*') + path(ancombc_formula, stageAs: 'ancombc_formula/da_barplot/*') path(picrust_pathways) path(sbdi, stageAs: 'sbdi/*') path(phyloseq, stageAs: 'phyloseq/*') @@ -131,6 +133,8 @@ process SUMMARY_REPORT { diversity_indices_beta ? "diversity_indices_beta='"+ diversity_indices_beta.join(",") +"'" : "", diversity_indices_adonis ? "diversity_indices_adonis='"+ diversity_indices_adonis.join(",") +"',qiime_adonis_formula='$params.qiime_adonis_formula'" : "", ancom ? "ancom='"+ ancom.join(",") +"'" : "", + ancombc ? "ancombc='"+ ancombc.join(",") +"'" : "", + ancombc_formula ? "ancombc_formula='"+ ancombc_formula.join(",") +"'" : "", sbdi ? "sbdi='"+ sbdi.join(",") +"'" : "", phyloseq ? "phyloseq='"+ phyloseq.join(",") +"'" : "", ] diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 9e19a74c..d79f1c86 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -25,6 +25,11 @@ process FASTQC { def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } def rename_to = old_new_pairs*.join(' ').join(' ') def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + + def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') + // FastQC memory value allowed range (100 - 10000) + def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) + """ printf "%s %s\\n" $rename_to | while read old_name new_name; do [ -f "\${new_name}" ] || ln -s \$old_name \$new_name @@ -33,6 +38,7 @@ process FASTQC { fastqc \\ $args \\ --threads $task.cpus \\ + --memory $fastqc_memory \\ $renamed_files cat <<-END_VERSIONS > versions.yml diff --git a/nextflow.config b/nextflow.config index b51c779a..81124505 100644 --- a/nextflow.config +++ b/nextflow.config @@ -76,6 +76,12 @@ params { ancom_sample_min_count = 1 vsearch_cluster = null vsearch_cluster_id = 0.97 + ancom = false + ancombc = false + ancombc_effect_size = 1 + ancombc_significance = 0.05 + ancombc_formula = null + ancombc_formula_reflvl = null // Report options report_template = "${projectDir}/assets/report_template.Rmd" @@ -99,7 +105,6 @@ params { skip_dada_addspecies = false skip_alpha_rarefaction = false skip_diversity_indices = false - skip_ancom = false skip_multiqc = false skip_report = false @@ -130,15 +135,16 @@ params { multiqc_methods_description = null // Boilerplate options - outdir = null - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - version = false + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' // Config options config_profile_name = null @@ -174,103 +180,109 @@ try { } // Load nf-core/ampliseq custom profiles from different institutions. -// Warning: Uncomment only if a pipeline-specific institutional config already exists on nf-core/configs! -// try { -// includeConfig "${params.custom_config_base}/pipeline/ampliseq.config" -// } catch (Exception e) { -// System.err.println("WARNING: Could not load nf-core/config/ampliseq profiles: ${params.custom_config_base}/pipeline/ampliseq.config") -// } +try { + includeConfig "${params.custom_config_base}/pipeline/ampliseq.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config/ampliseq profiles: ${params.custom_config_base}/pipeline/ampliseq.config") +} profiles { debug { - dumpHashes = true - process.beforeScript = 'echo $HOSTNAME' - cleanup = false + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false nextflow.enable.configProcessNamesValidation = true } conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - channels = ['conda-forge', 'bioconda', 'defaults'] - apptainer.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda', 'defaults'] + apptainer.enabled = false } mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { - docker.enabled = true - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - docker.runOptions = '-u $(id -u):$(id -g)' + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true - conda.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - apptainer.enabled = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false } apptainer { - apptainer.enabled = true - apptainer.autoMounts = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' } gitpod { - executor.name = 'local' - executor.cpus = 4 - executor.memory = 8.GB + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB } test { includeConfig 'conf/test.config' } test_single { includeConfig 'conf/test_single.config' } @@ -344,8 +356,8 @@ manifest { description = """Amplicon sequencing analysis workflow using DADA2 and QIIME2""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.9.0' - doi = '10.5281/zenodo.1493841' + version = '2.10.0' + doi = '10.5281/zenodo.1493841,10.3389/fmicb.2020.550420' } // Load modules.config for DSL2 module specific options diff --git a/nextflow_schema.json b/nextflow_schema.json index 7cf1cd21..1bba874b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -360,6 +360,7 @@ "gtdb=R06-RS202", "gtdb=R07-RS207", "gtdb=R08-RS214", + "gtdb=R09-RS220", "midori2-co1", "midori2-co1=gb250", "pr2", @@ -372,6 +373,7 @@ "sbdi-gtdb=R06-RS202-1", "sbdi-gtdb=R06-RS202-3", "sbdi-gtdb=R07-RS207-1", + "sbdi-gtdb=R08-RS214-1", "silva", "silva=132", "silva=138", @@ -631,12 +633,6 @@ "description": "Minimum rarefaction depth for diversity analysis. Any sample below that threshold will be removed.", "fa_icon": "fas fa-greater-than-equal" }, - "ancom_sample_min_count": { - "type": "integer", - "default": 1, - "description": "Minimum sample counts to retain a sample for ANCOM analysis. Any sample below that threshold will be removed.", - "fa_icon": "fas fa-greater-than-equal" - }, "tax_agglom_min": { "type": "integer", "default": 2, @@ -653,6 +649,59 @@ } } }, + "differential_abundance_analysis": { + "title": "Differential abundance analysis", + "type": "object", + "description": "", + "default": "", + "fa_icon": "fas fa-bacteria", + "properties": { + "ancom_sample_min_count": { + "type": "integer", + "default": 1, + "description": "Minimum sample counts to retain a sample for ANCOM analysis. Any sample below that threshold will be removed.", + "fa_icon": "fas fa-greater-than-equal" + }, + "ancom": { + "type": "boolean", + "description": "Perform differential abundance analysis with ANCOM", + "fa_icon": "fas fa-greater-than-equal" + }, + "ancombc": { + "type": "boolean", + "description": "Perform differential abundance analysis with ANCOMBC", + "help_text": "ANCOMBC will be performed on all suitable columns in the metadata sheet. Empty values will be removed, therefore it is possible to perform tests on subsets. The reference level will default to highest alphanumeric group (e.g. in alphabetical or numeric order, as applicable) within each metadata column. Formula for specific tests can be supplied with `--ancombc_formula`.", + "fa_icon": "fas fa-greater-than-equal" + }, + "ancombc_formula": { + "type": "string", + "description": "Formula to perform differential abundance analysis with ANCOMBC", + "help_text": "Comma separated list of model formula(s), e.g. \"treatment1,treatment2\". The reference level will default to highest alphanumeric group (e.g. in alphabetical or numeric order, as applicable) within each formula term. The reference level can be overwritten by `--ancombc_formula_reflvl`. Model formula should contain only independent terms in the sample metadata. These can be continuous variables or factors, and they can have interactions as in a typical R formula. Essentially, columns in the metadata sheet can be chosen that have no empty values, not only unique values, or not only identical values.\nFor example, \"treatment1+treatment2\" tests whether the data partitions based on \"treatment1\" and \"treatment2\" sample metadata. \"treatment1*treatment2\" test both of those effects as well as their interaction.\nMore examples can be found in the R documentation, https://cran.r-project.org/doc/manuals/r-release/R-intro.html#Formulae-for-statistical-models", + "fa_icon": "fas fa-greater-than-equal" + }, + "ancombc_formula_reflvl": { + "type": "string", + "description": "Reference level for `--ancombc_formula`", + "help_text": "This will only affect ANCOM-BC started by `--ancombc_formula`, but for all provided model formula, therefore it might be best to restrict `--ancombc_formula` to one formula. The syntax is as follows: 'column_name::column_value' or for multiple 'column_name1::column_value1 column_name2::column_value2'", + "fa_icon": "fas fa-greater-than-equal" + }, + "ancombc_effect_size": { + "type": "number", + "default": 1, + "minimum": 0, + "description": "Effect size threshold for differential abundance barplot for `--ancombc` and `--ancombc_formula`", + "fa_icon": "fas fa-greater-than-equal" + }, + "ancombc_significance": { + "type": "number", + "default": 0.05, + "minimum": 0, + "maximum": 1, + "description": "Significance threshold for differential abundance barplot for `--ancombc` and `--ancombc_formula`", + "fa_icon": "fas fa-greater-than-equal" + } + } + }, "pipeline_report": { "title": "Pipeline summary report", "type": "object", @@ -744,10 +793,6 @@ "type": "boolean", "description": "Skip alpha and beta diversity analysis" }, - "skip_ancom": { - "type": "boolean", - "description": "Skip differential abundance testing" - }, "skip_multiqc": { "type": "boolean", "description": "Skip MultiQC reporting" @@ -872,6 +917,13 @@ "description": "Validation of parameters in lenient more.", "hidden": true, "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + }, + "pipelines_testdata_base_path": { + "type": "string", + "fa_icon": "far fa-check-circle", + "description": "Base URL or local path to location of pipeline test dataset files", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/", + "hidden": true } } }, @@ -993,6 +1045,9 @@ { "$ref": "#/definitions/downstream_analysis" }, + { + "$ref": "#/definitions/differential_abundance_analysis" + }, { "$ref": "#/definitions/pipeline_report" }, diff --git a/subworkflows/local/dada2_preprocessing.nf b/subworkflows/local/dada2_preprocessing.nf index 0e4889d0..dc5bceb8 100644 --- a/subworkflows/local/dada2_preprocessing.nf +++ b/subworkflows/local/dada2_preprocessing.nf @@ -90,7 +90,7 @@ workflow DADA2_PREPROCESSING { if (params.ignore_failed_filtering) { log.warn "The following samples had too few reads (<$params.min_read_counts) after quality filtering with DADA2:\n$samples\nIgnoring failed samples and continue!\n" } else { - error("The following samples had too few reads (<$params.min_read_counts) after quality filtering with DADA2:\n$samples\nPlease check whether the correct primer sequences for trimming were supplied. Ignore that samples using `--ignore_failed_filtering` or adjust the threshold with `--min_read_counts`.") + error("The following samples had too few reads (<$params.min_read_counts) after quality filtering with DADA2:\n$samples\nPlease check settings related to quality filtering such as `--max_ee` (increase), `--trunc_qmin` (increase) or `--trunclenf`/`--trunclenr` (decrease). Ignore that samples using `--ignore_failed_filtering` or adjust the threshold with `--min_read_counts`.") } } diff --git a/subworkflows/local/qiime2_ancom.nf b/subworkflows/local/qiime2_ancom.nf index fb2cd9cf..a2e28421 100644 --- a/subworkflows/local/qiime2_ancom.nf +++ b/subworkflows/local/qiime2_ancom.nf @@ -5,6 +5,10 @@ include { QIIME2_FILTERSAMPLES as QIIME2_FILTERSAMPLES_ANCOM } from '../../modules/local/qiime2_filtersamples' include { QIIME2_ANCOM_TAX } from '../../modules/local/qiime2_ancom_tax' include { QIIME2_ANCOM_ASV } from '../../modules/local/qiime2_ancom_asv' +include { QIIME2_ANCOMBC_ASV } from '../../modules/local/qiime2_ancombc_asv' +include { QIIME2_ANCOMBC_TAX } from '../../modules/local/qiime2_ancombc_tax' +include { QIIME2_ANCOMBC_ASV as ANCOMBC_FORMULA_ASV } from '../../modules/local/qiime2_ancombc_asv' +include { QIIME2_ANCOMBC_TAX as ANCOMBC_FORMULA_TAX } from '../../modules/local/qiime2_ancombc_tax' workflow QIIME2_ANCOM { take: @@ -14,10 +18,13 @@ workflow QIIME2_ANCOM { ch_tax tax_agglom_min tax_agglom_max + ancombc_formula main: ch_versions_qiime2_ancom = Channel.empty() + ch_taxlevel = Channel.of( tax_agglom_min..tax_agglom_max ) + //Filter ASV table to get rid of samples that have no metadata values ch_metadata .combine( ch_asv ) @@ -26,21 +33,62 @@ workflow QIIME2_ANCOM { QIIME2_FILTERSAMPLES_ANCOM ( ch_for_filtersamples ) ch_versions_qiime2_ancom = ch_versions_qiime2_ancom.mix(QIIME2_FILTERSAMPLES_ANCOM.out.versions) - //ANCOM on various taxonomic levels - ch_taxlevel = Channel.of( tax_agglom_min..tax_agglom_max ) - ch_metadata - .combine( QIIME2_FILTERSAMPLES_ANCOM.out.qza ) - .combine( ch_tax ) - .combine( ch_taxlevel ) - .set{ ch_for_ancom_tax } - QIIME2_ANCOM_TAX ( ch_for_ancom_tax ) - ch_versions_qiime2_ancom = ch_versions_qiime2_ancom.mix(QIIME2_ANCOM_TAX.out.versions) - QIIME2_ANCOM_TAX.out.ancom.subscribe { if ( it.baseName[0].toString().startsWith("WARNING") ) log.warn it.baseName[0].toString().replace("WARNING ","QIIME2_ANCOM_TAX: ") } + if ( params.ancom ) { + //ANCOM on various taxonomic levels + ch_metadata + .combine( QIIME2_FILTERSAMPLES_ANCOM.out.qza ) + .combine( ch_tax ) + .combine( ch_taxlevel ) + .set{ ch_for_ancom_tax } + QIIME2_ANCOM_TAX ( ch_for_ancom_tax ) + ch_versions_qiime2_ancom = ch_versions_qiime2_ancom.mix(QIIME2_ANCOM_TAX.out.versions) + QIIME2_ANCOM_TAX.out.ancom.subscribe { if ( it.baseName[0].toString().startsWith("WARNING") ) log.warn it.baseName[0].toString().replace("WARNING ","QIIME2_ANCOM_TAX: ") } + + //ANCOM on ASVs + QIIME2_ANCOM_ASV ( ch_metadata.combine( QIIME2_FILTERSAMPLES_ANCOM.out.qza.flatten() ) ) + ch_versions_qiime2_ancom = ch_versions_qiime2_ancom.mix(QIIME2_ANCOM_ASV.out.versions) + } + + if ( params.ancombc ) { + //ANCOMBC on various taxonomic levels + ch_metadata + .combine( QIIME2_FILTERSAMPLES_ANCOM.out.qza ) + .combine( ch_tax ) + .combine( ch_taxlevel ) + .combine( Channel.fromList([""]) ) + .set{ ch_for_ancombc_tax } + QIIME2_ANCOMBC_TAX ( ch_for_ancombc_tax ) + ch_versions_qiime2_ancom = ch_versions_qiime2_ancom.mix(QIIME2_ANCOMBC_TAX.out.versions) + QIIME2_ANCOMBC_TAX.out.da_barplot.subscribe { if ( it.baseName[0].toString().startsWith("WARNING") ) log.warn it.baseName[0].toString().replace("WARNING ","QIIME2_ANCOMBC_TAX: ") } + + //ANCOMBC on ASVs + QIIME2_ANCOMBC_ASV ( ch_metadata.combine( QIIME2_FILTERSAMPLES_ANCOM.out.qza.flatten() ).combine( Channel.fromList([""]) ) ) + ch_versions_qiime2_ancom = ch_versions_qiime2_ancom.mix(QIIME2_ANCOMBC_ASV.out.versions) + } + + if ( ancombc_formula ) { + ch_ancombc_formula = Channel.fromList( ancombc_formula.toString().replace(" ","").tokenize(',') ) + + //ANCOMBC with ancombc_formula on various taxonomic levels + ch_taxlevel = Channel.of( tax_agglom_min..tax_agglom_max ) + ch_metadata + .combine( ch_asv ) + .combine( ch_tax ) + .combine( ch_taxlevel ) + .combine( ch_ancombc_formula ) + .set{ ch_for_ancombc_tax } + ANCOMBC_FORMULA_TAX ( ch_for_ancombc_tax ) + ch_versions_qiime2_ancom = ch_versions_qiime2_ancom.mix(ANCOMBC_FORMULA_TAX.out.versions) + ANCOMBC_FORMULA_TAX.out.da_barplot.subscribe { if ( it.baseName[0].toString().startsWith("WARNING") ) log.warn it.baseName[0].toString().replace("WARNING ","QIIME2_ANCOMBC_TAX: ") } - QIIME2_ANCOM_ASV ( ch_metadata.combine( QIIME2_FILTERSAMPLES_ANCOM.out.qza.flatten() ) ) - ch_versions_qiime2_ancom = ch_versions_qiime2_ancom.mix(QIIME2_ANCOM_ASV.out.versions) + //ANCOMBC with ancombc_formula on ASVs + ANCOMBC_FORMULA_ASV ( ch_metadata.combine( ch_asv ).combine( ch_ancombc_formula ) ) + ch_versions_qiime2_ancom = ch_versions_qiime2_ancom.mix(ANCOMBC_FORMULA_ASV.out.versions) + } emit: - ancom = QIIME2_ANCOM_ASV.out.ancom.mix(QIIME2_ANCOM_TAX.out.ancom) + ancom = params.ancom ? QIIME2_ANCOM_ASV.out.ancom.mix(QIIME2_ANCOM_TAX.out.ancom) : Channel.empty() + ancombc = params.ancombc ? QIIME2_ANCOMBC_ASV.out.da_barplot.mix(QIIME2_ANCOMBC_TAX.out.da_barplot) : Channel.empty() + ancombc_formula = ancombc_formula ? ANCOMBC_FORMULA_ASV.out.da_barplot.mix(ANCOMBC_FORMULA_TAX.out.da_barplot) : Channel.empty() versions = ch_versions_qiime2_ancom } diff --git a/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf index fcd1c5ae..321463ae 100644 --- a/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf @@ -71,7 +71,6 @@ workflow PIPELINE_INITIALISATION { UTILS_NFCORE_PIPELINE ( nextflow_cli_args ) - // // Custom validation for pipeline parameters // @@ -132,6 +131,10 @@ workflow PIPELINE_COMPLETION { imNotification(summary_params, hook_url) } } + + workflow.onError { + log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + } } /* @@ -139,7 +142,6 @@ workflow PIPELINE_COMPLETION { FUNCTIONS ======================================================================================== */ - // // Check and validate pipeline parameters // @@ -235,7 +237,7 @@ def validateInputParameters() { error("Incompatible parameters: `--filter_ssu` cannot be used with `--skip_barrnap` because filtering for SSU's depends on barrnap.") } - String[] sbdi_compatible_databases = ["coidb","coidb=221216","gtdb","gtdb=R08-RS214","gtdb=R07-RS207","gtdb=R06-RS202","gtdb=R05-RS95","midori2-co1","midori2-co1=gb250","pr2","pr2=5.0.0","pr2=4.14.0","pr2=4.13.0","rdp","rdp=18","sbdi-gtdb","sbdi-gtdb=R07-RS207-1","silva","silva=138","silva=132","unite-fungi","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2","unite-alleuk","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2"] + String[] sbdi_compatible_databases = ["coidb","coidb=221216","gtdb","gtdb=R09-RS220","gtdb=R08-RS214","gtdb=R07-RS207","gtdb=R06-RS202","gtdb=R05-RS95","midori2-co1","midori2-co1=gb250","pr2","pr2=5.0.0","pr2=4.14.0","pr2=4.13.0","rdp","rdp=18","sbdi-gtdb","sbdi-gtdb=R08-RS214-1","sbdi-gtdb=R07-RS207-1","silva","silva=138","silva=132","unite-fungi","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2","unite-alleuk","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2"] if (params.sbdiexport){ if (params.sintax_ref_taxonomy ) { if (!Arrays.stream(sbdi_compatible_databases).anyMatch(entry -> params.sintax_ref_taxonomy.toString().equals(entry)) ) { @@ -412,8 +414,16 @@ def methodsDescriptionText(mqc_methods_yaml) { meta["manifest_map"] = workflow.manifest.toMap() // Pipeline DOI - meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + if (meta.manifest_map.doi) { + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + def temp_doi_ref = "" + String[] manifest_doi = meta.manifest_map.doi.tokenize(",") + for (String doi_ref: manifest_doi) temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) + } else meta["doi_text"] = "" + meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " // Tool references meta["tool_citations"] = "" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index a8b55d6f..14558c39 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -65,9 +65,15 @@ def checkProfileProvided(nextflow_cli_args) { // Citation string for pipeline // def workflowCitation() { + def temp_doi_ref = "" + String[] manifest_doi = workflow.manifest.doi.tokenize(",") + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + for (String doi_ref: manifest_doi) temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + "* The pipeline\n" + - " ${workflow.manifest.doi}\n\n" + + temp_doi_ref + "\n" + "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + "* Software dependencies\n" + diff --git a/tests/pipeline/failed.nf.test b/tests/pipeline/failed.nf.test index 4c02b944..dd39b9b1 100644 --- a/tests/pipeline/failed.nf.test +++ b/tests/pipeline/failed.nf.test @@ -23,7 +23,7 @@ nextflow_pipeline { { assert new File("$outputDir/dada2/ASV_tax.user.tsv").exists() }, { assert new File("$outputDir/qiime2/abundance_tables/count_table_filter_stats.tsv").exists() }, { assert new File("$outputDir/qiime2/abundance_tables/feature-table.tsv").exists() }, - { assert new File("$outputDir/qiime2/ancom/Category-treatment1-ASV/ancom.tsv").exists() }, + { assert new File("$outputDir/qiime2/ancombc/da_barplot/Category-treatment1-ASV/index.html").exists() }, { assert new File("$outputDir/qiime2/barplot/index.html").exists() }, { assert new File("$outputDir/qiime2/alpha-rarefaction/index.html").exists() }, { assert new File("$outputDir/qiime2/diversity/alpha_diversity/shannon_vector/kruskal-wallis-pairwise-treatment1.csv").exists() }, diff --git a/tests/pipeline/test.nf.test b/tests/pipeline/test.nf.test index 59a1082d..2c215ad3 100644 --- a/tests/pipeline/test.nf.test +++ b/tests/pipeline/test.nf.test @@ -77,18 +77,23 @@ nextflow_pipeline { { assert new File("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza").exists() }, { assert new File("$outputDir/qiime2/taxonomy/ref_taxonomy.txt").exists() }, { assert new File("$outputDir/qiime2/taxonomy/taxonomy.tsv").exists() }, - { assert new File("$outputDir/qiime2/ancom/Category-badpairwise10-ASV/percent-abundances.tsv").exists() }, - { assert new File("$outputDir/qiime2/ancom/Category-badpairwise10-level-2/percent-abundances.tsv").exists() }, - { assert new File("$outputDir/qiime2/ancom/Category-badpairwise10-level-3/percent-abundances.tsv").exists() }, - { assert new File("$outputDir/qiime2/ancom/Category-badpairwise10-level-4/percent-abundances.tsv").exists() }, - { assert new File("$outputDir/qiime2/ancom/Category-mix8-ASV/percent-abundances.tsv").exists() }, - { assert new File("$outputDir/qiime2/ancom/Category-mix8-level-2/percent-abundances.tsv").exists() }, - { assert new File("$outputDir/qiime2/ancom/Category-mix8-level-3/percent-abundances.tsv").exists() }, - { assert new File("$outputDir/qiime2/ancom/Category-mix8-level-4/percent-abundances.tsv").exists() }, - { assert new File("$outputDir/qiime2/ancom/Category-treatment1-ASV/percent-abundances.tsv").exists() }, - { assert new File("$outputDir/qiime2/ancom/Category-treatment1-level-2/percent-abundances.tsv").exists() }, - { assert new File("$outputDir/qiime2/ancom/Category-treatment1-level-3/percent-abundances.tsv").exists() }, - { assert new File("$outputDir/qiime2/ancom/Category-treatment1-level-4/percent-abundances.tsv").exists() }, + { assert new File("$outputDir/qiime2/ancombc/da_barplot/Category-badpairwise10-ASV/index.html").exists() }, + { assert new File("$outputDir/qiime2/ancombc/da_barplot/Category-badpairwise10-level-2/index.html").exists() }, + { assert new File("$outputDir/qiime2/ancombc/da_barplot/Category-badpairwise10-level-3/index.html").exists() }, + { assert new File("$outputDir/qiime2/ancombc/da_barplot/Category-badpairwise10-level-4/index.html").exists() }, + { assert new File("$outputDir/qiime2/ancombc/da_barplot/Category-mix8-ASV/index.html").exists() }, + { assert new File("$outputDir/qiime2/ancombc/da_barplot/Category-mix8-level-2/index.html").exists() }, + { assert new File("$outputDir/qiime2/ancombc/da_barplot/Category-mix8-level-3/index.html").exists() }, + { assert new File("$outputDir/qiime2/ancombc/da_barplot/Category-mix8-level-4/index.html").exists() }, + { assert new File("$outputDir/qiime2/ancombc/da_barplot/Category-treatment1-ASV/index.html").exists() }, + { assert new File("$outputDir/qiime2/ancombc/da_barplot/Category-treatment1-level-2/index.html").exists() }, + { assert new File("$outputDir/qiime2/ancombc/da_barplot/Category-treatment1-level-3/index.html").exists() }, + { assert new File("$outputDir/qiime2/ancombc/da_barplot/Category-treatment1-level-4/index.html").exists() }, + { assert new File("$outputDir/qiime2/ancombc/differentials/Category-treatment1-level-4/index.html").exists() }, + { assert new File("$outputDir/qiime2/ancombc_formula/da_barplot/Category-treatment1-ASV/index.html").exists() }, + { assert new File("$outputDir/qiime2/ancombc_formula/differentials/Category-treatment1-ASV/index.html").exists() }, + { assert new File("$outputDir/qiime2/ancombc_formula/da_barplot/Category-treatment1-level-4/index.html").exists() }, + { assert new File("$outputDir/qiime2/ancombc_formula/differentials/Category-treatment1-level-4/index.html").exists() }, { assert snapshot(path("$outputDir/SBDI/dna.tsv"), path("$outputDir/SBDI/emof.tsv"), path("$outputDir/SBDI/event.tsv")).match("SBDI") }, diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index d1d306c0..fa221126 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -772,7 +772,7 @@ workflow AMPLISEQ { } if (!params.skip_barplot) { - QIIME2_BARPLOT ( ch_metadata, ch_asv, ch_tax, '' ) + QIIME2_BARPLOT ( ch_metadata.ifEmpty([]), ch_asv, ch_tax, '' ) ch_versions = ch_versions.mix( QIIME2_BARPLOT.out.versions ) } @@ -788,7 +788,7 @@ workflow AMPLISEQ { ch_versions = ch_versions.mix( METADATA_PAIRWISE.out.versions ) ch_metacolumn_pairwise = ch_metacolumn_pairwise.splitCsv().flatten() ch_metacolumn_pairwise = ch_metacolumn_all.join(ch_metacolumn_pairwise) - } else if (!params.skip_ancom || !params.skip_diversity_indices) { + } else if (params.ancom || params.ancombc || !params.skip_diversity_indices) { METADATA_ALL ( ch_metadata ).category.set { ch_metacolumn_all } ch_versions = ch_versions.mix( METADATA_ALL.out.versions ) //return empty channel if no appropriate column was found @@ -820,15 +820,16 @@ workflow AMPLISEQ { ch_versions = ch_versions.mix( QIIME2_DIVERSITY.out.versions ) } - //Perform ANCOM tests - if ( !params.skip_ancom && params.metadata ) { + //Perform ANCOM and ANCOMBC tests + if ( ( params.ancom || params.ancombc || params.ancombc_formula ) && params.metadata ) { QIIME2_ANCOM ( ch_metadata, ch_asv, ch_metacolumn_all, ch_tax, tax_agglom_min, - tax_agglom_max + tax_agglom_max, + params.ancombc_formula ) ch_versions = ch_versions.mix( QIIME2_ANCOM.out.versions ) } @@ -888,23 +889,45 @@ workflow AMPLISEQ { // Collate and save software versions // softwareVersionsToYAML(ch_versions) - .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'software_versions.yml', sort: true, newLine: true) - .set { ch_collated_versions } + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'software_versions.yml', + sort: true, + newLine: true + ).set { ch_collated_versions } // // MODULE: MultiQC // if (!params.skip_multiqc) { - ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) + ch_multiqc_config = Channel.fromPath( + "$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? + Channel.fromPath(params.multiqc_config, checkIfExists: true) : + Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? + Channel.fromPath(params.multiqc_logo, checkIfExists: true) : + Channel.empty() + + summary_params = paramsSummaryMap( + workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? + file(params.multiqc_methods_description, checkIfExists: true) : + file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value( + methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_files = ch_multiqc_files.mix( + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix( + ch_methods_description.collectFile( + name: 'methods_description_mqc.yaml', + sort: true + ) + ) MULTIQC ( ch_multiqc_files.collect(), @@ -981,7 +1004,9 @@ workflow AMPLISEQ { run_qiime2 && !params.skip_diversity_indices && params.metadata ? QIIME2_DIVERSITY.out.alpha.collect().ifEmpty( [] ) : [], run_qiime2 && !params.skip_diversity_indices && params.metadata ? QIIME2_DIVERSITY.out.beta.collect().ifEmpty( [] ) : [], run_qiime2 && !params.skip_diversity_indices && params.metadata ? QIIME2_DIVERSITY.out.adonis.collect().ifEmpty( [] ) : [], - run_qiime2 && !params.skip_ancom && params.metadata ? QIIME2_ANCOM.out.ancom.collect().ifEmpty( [] ) : [], + run_qiime2 && params.ancom && params.metadata ? QIIME2_ANCOM.out.ancom.collect().ifEmpty( [] ) : [], + run_qiime2 && params.ancombc && params.metadata ? QIIME2_ANCOM.out.ancombc.collect().ifEmpty( [] ) : [], + run_qiime2 && params.ancombc_formula && params.metadata ? QIIME2_ANCOM.out.ancombc_formula.collect().ifEmpty( [] ) : [], params.picrust ? PICRUST.out.pathways.ifEmpty( [] ) : [], params.sbdiexport ? SBDIEXPORT.out.sbditables.mix(SBDIEXPORTREANNOTATE.out.sbdiannottables).collect().ifEmpty( [] ) : [], !params.skip_taxonomy ? PHYLOSEQ_WORKFLOW.out.rds.map{info,rds -> [rds]}.collect().ifEmpty( [] ) : []