diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index ea27a584..4ecfbfe3 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -2,6 +2,7 @@ "name": "nfcore", "image": "nfcore/gitpod:latest", "remoteUser": "gitpod", + "runArgs": ["--privileged"], // Configure tool-specific properties. "customizations": { diff --git a/.editorconfig b/.editorconfig index b78de6e6..b6b31907 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,7 +8,7 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{md,yml,yaml,html,css,scss,js,cff}] +[*.{md,yml,yaml,html,css,scss,js}] indent_size = 2 # These files are edited and tested upstream in nf-core/modules diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 5ef6a8e1..c71d079f 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,7 +9,9 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -> If you need help using or modifying nf-core/differentialabundance then the best place to ask is on the nf-core Slack [#differentialabundance](https://nfcore.slack.com/channels/differentialabundance) channel ([join our Slack here](https://nf-co.re/join/slack)). +:::info +If you need help using or modifying nf-core/differentialabundance then the best place to ask is on the nf-core Slack [#differentialabundance](https://nfcore.slack.com/channels/differentialabundance) channel ([join our Slack here](https://nf-co.re/join/slack)). +::: ## Contribution workflow @@ -116,4 +118,3 @@ To get started: Devcontainer specs: - [DevContainer config](.devcontainer/devcontainer.json) -- [Dockerfile](.devcontainer/Dockerfile) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 2a6d4d38..39b210da 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,9 +42,9 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 22.10.1)_ + * Nextflow version _(eg. 23.04.0)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ - * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ * OS _(eg. CentOS Linux, macOS, Linux Mint)_ * Version of nf-core/differentialabundance _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 5ed37d55..1f6612e5 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -15,7 +15,8 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/diff - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! -- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/differentialabundance/tree/master/.github/CONTRIBUTING.md)- [ ] If necessary, also make a PR on the nf-core/differentialabundance _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/differentialabundance/tree/master/.github/CONTRIBUTING.md) +- [ ] If necessary, also make a PR on the nf-core/differentialabundance _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 7a62d152..8d063f29 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -14,18 +14,23 @@ jobs: runs-on: ubuntu-latest steps: - name: Launch workflow via tower - uses: nf-core/tower-action@v3 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/differentialabundance/work-${{ github.sha }} parameters: | { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/differentialabundance/results-${{ github.sha }}" } - profiles: test_full,aws_tower + profiles: test_full + - uses: actions/upload-artifact@v3 with: name: Tower debug log file - path: tower_action_*.log + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index b914c8ae..1bd534bd 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -12,18 +12,22 @@ jobs: steps: # Launch workflow using Tower CLI tool action - name: Launch workflow via tower - uses: nf-core/tower-action@v3 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/differentialabundance/work-${{ github.sha }} parameters: | { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/differentialabundance/results-test-${{ github.sha }}" } - profiles: test,aws_tower + profiles: test + - uses: actions/upload-artifact@v3 with: name: Tower debug log file - path: tower_action_*.log + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 1e9d91ad..3bf724ea 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -13,7 +13,7 @@ jobs: - name: Check PRs if: github.repository == 'nf-core/differentialabundance' run: | - { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/differentialabundance ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/differentialabundance ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] # If the above check failed, post a comment on the PR explaining the failure # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 771f018e..ece2dda8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,12 +24,14 @@ jobs: strategy: matrix: NXF_VER: - - "22.10.1" + - "23.04.0" - "latest-everything" profile: - "test" - "test_nogtf" - "test_affy" + - "test_maxquant" + - "test_soft" steps: - name: Check out pipeline code uses: actions/checkout@v3 diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml new file mode 100644 index 00000000..694e90ec --- /dev/null +++ b/.github/workflows/clean-up.yml @@ -0,0 +1,24 @@ +name: "Close user-tagged issues and PRs" +on: + schedule: + - cron: "0 0 * * 0" # Once a week + +jobs: + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@v7 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 858d622e..b8bdd214 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -78,7 +78,7 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: "3.7" + python-version: "3.11" architecture: "x64" - name: Install dependencies diff --git a/.github/workflows/release-announcments.yml b/.github/workflows/release-announcments.yml new file mode 100644 index 00000000..6ad33927 --- /dev/null +++ b/.github/workflows/release-announcments.yml @@ -0,0 +1,68 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@v0.0.2 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/.gitpod.yml b/.gitpod.yml index 85d95ecc..25488dcc 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,4 +1,9 @@ image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update vscode: extensions: # based on nf-core.nf-core-extensionpack diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..0c31cdb9 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,5 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v2.7.1" + hooks: + - id: prettier diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ce69e42..991a6b02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,43 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v1.3.0 - 2023-10-25 + +### `Added` + +- [[#124](https://github.com/nf-core/differentialabundance/pull/124)] - Template update for nf-core/tools v2.8 ([@pinin4fjords](https://github.com/pinin4fjords), review by [@jasmezz](https://github.com/jasmezz)) +- [[#129](https://github.com/nf-core/differentialabundance/pull/129)] - Module updates to fit with recent registry changes ([@pinin4fjords](https://github.com/pinin4fjords), review by [@maxulysse](https://github.com/maxulysse), [@adamrtalbot](https://github.com/adamrtalbot)) +- [[#130](https://github.com/nf-core/differentialabundance/pull/130)] - Document reasons for lack of differential expression ([@pinin4fjords](https://github.com/pinin4fjords), review by [@jfy133](https://github.com/jfy133)) +- [[#131](https://github.com/nf-core/differentialabundance/pull/131)] - Improve gtf to table configurability ([@pinin4fjords](https://github.com/pinin4fjords), review by [@WackerO](https://github.com/WackerO)) +- [# 136](https://github.com/nf-core/differentialabundance/pull/136)] - Added support for non-Affymetrix arrays via automatic download of SOFT matrices in GEO ([@azedinez](https://github.com/azedinez), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#137](https://github.com/nf-core/differentialabundance/pull/137)] - Add `--sizefactors_from_controls` and `--gene_id_col` for DESeq2 module to modules.config ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#145](https://github.com/nf-core/differentialabundance/pull/145)] - Template update for nf-core/tools v2.9 ([@nf-core-bot](https://github.com/nf-core-bot), review by [@pinin4fjords](https://github.com/pinin4fjords), [@WackerO](https://github.com/WackerO)) +- [[#147](https://github.com/nf-core/differentialabundance/pull/147)] - Add Maxquant analysis module ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#166](https://github.com/nf-core/differentialabundance/issues/166)] - Output a parameter-resolved R Markdown document, as well as rendered HTML ([@pinin4fjords](https://github.com/pinin4fjords), review by [@WackerO](https://github.com/WackerO)) +- [[#119](https://github.com/nf-core/differentialabundance/issues/119)] - Document sample sheet for Affy arrays ([@pinin4fjords](https://github.com/pinin4fjords), review by [@WackerO](https://github.com/WackerO)) +- [[#165](https://github.com/nf-core/differentialabundance/issues/165)] - Update subway map ([@pinin4fjords](https://github.com/pinin4fjords), review by [@FriederikeHanssen](https://github.com/FriederikeHanssen)) +- [[#135](https://github.com/nf-core/differentialabundance/issues/135)] - workaround OPENBLAS using all cores problem ([@pinin4fjords](https://github.com/pinin4fjords), review by [@sateeshperi](https://github.com/sateeshperi)) +- [[#176](https://github.com/nf-core/differentialabundance/pull/176)] - bump shinyngs ([@pinin4fjords](https://github.com/pinin4fjords), review by ) + +### `Fixed` + +- [[#116](https://github.com/nf-core/differentialabundance/issues/116)] - Skip outlier detection with low replication ([@pinin4fjords](https://github.com/pinin4fjords), review by [@nvnieuwk](https://github.com/nvnieuwk)) +- [[#122](https://github.com/nf-core/differentialabundance/pull/126)] - Add spaces to satisfy nf-core download for singularity ([@pinin4fjords](https://github.com/pinin4fjords), review by [@WackerO](https://github.com/WackerO)) +- [[#127](https://github.com/nf-core/differentialabundance/issues/127)] - [Bug] Can't pass samplesheet with -c file.config , or -params-file params.yml or directly with --input samplesheet.csv ([@ctuni](https://github.com/ctuni), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#138](https://github.com/nf-core/differentialabundance/issues/138)]- Fix bugs with --control_features and --sizefactors_from_controls ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#133](https://github.com/nf-core/differentialabundance/issues/133)] - Sample exclusion options fail due to contrast-wise normalisation ([@pinin4fjords](https://github.com/pinin4fjords), review by [@WackerO](https://github.com/WackerO)) +- [[#160](https://github.com/nf-core/differentialabundance/issues/160)]- Fix merge conflicts for Template update 2.10 by nf-core-bot ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#164](https://github.com/nf-core/differentialabundance/pull/164)] - Rlog + other small fixes ([@pinin4fjords](https://github.com/pinin4fjords), review by [@drpatelh](https://github.com/drpatelh)) +- [[#174](https://github.com/nf-core/differentialabundance/pull/174)] - Fix metro map ([@pinin4fjords](https://github.com/pinin4fjords), review by [@WackerO](https://github.com/WackerO)) + +### `Changed` + +- [[#179](https://github.com/nf-core/differentialabundance/issues/179)] - Removed shiny app error message for proteus runs ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#159](https://github.com/nf-core/differentialabundance/issues/159)] - CUSTOM/MATRIXFILTER module update ([@WackerO](https://github.com/WackerO), review by [@suzannejin](https://github.com/suzannejin)) +- [[#154](https://github.com/nf-core/differentialabundance/issues/154)] - RMARKDOWNNOTEBOOK env update ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#151](https://github.com/nf-core/differentialabundance/issues/151)] - Module update ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#147](https://github.com/nf-core/differentialabundance/pull/147)] - RMARKDOWNNOTEBOOK env update, SHINYNGS and CUSTOM update ([@WackerO](https://github.com/WackerO), review by [@pinin4fjords](https://github.com/pinin4fjords)) + ## v1.2.0 - 2023-04-19 ### `Added` @@ -11,6 +48,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#105](https://github.com/nf-core/differentialabundance/pull/105)] - Enabled multiple GMT/GMX files for GSEA ([@WackerO](https://github.com/WackerO), reported by [@grst](https://github.com/grst), review by [@pinin4fjords](https://github.com/pinin4fjords)) - [[#108](https://github.com/nf-core/differentialabundance/issues/108)] - Add shiny app generation (starting feature set) ([@pinin4fjords](https://github.com/pinin4fjords), review by [@WackerO](https://github.com/WackerO)) - [[#110](https://github.com/nf-core/differentialabundance/pull/110)] - Add shiny app outputs to tower.yml ([@pinin4fjords](https://github.com/pinin4fjords), review by [@WackerO](https://github.com/WackerO), [@maxulysse](https://github.com/maxulysse)) +- [[#149]](https://github.com/nf-core/differentialabundance/pull/149) - Update README.md - add ref to nf-core/rnaseq and Affymetrix ([@smoe](https://github.com/smoe), review by [@pinin4fjords](https://github.com/pinin4fjords)) ### `Fixed` diff --git a/CITATIONS.md b/CITATIONS.md index 5896c1f9..200368b1 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -16,13 +16,9 @@ ## R packages -- [R](https://www.R-project.org/) +- [affy](https://pubmed.ncbi.nlm.nih.gov/14960456/) - > R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. - -- [shinyngs](https://github.com/pinin4fjords/shinyngs) - - > Jonathan R Manning (2022). Shiny apps for NGS etc based on reusable components created using Shiny modules. Computer software. Vers. 1.5.3. Jonathan Manning, Dec. 2022. Web. + > Gautier L, Cope L, Bolstad BM, Irizarry RA. Affy--analysis of affymetrix genechip data at the probe level. Bioinformatics. 2004;20(3):307-315. - [DESeq2](https://pubmed.ncbi.nlm.nih.gov/25516281/) @@ -32,26 +28,46 @@ > H. Wickham (2016). ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York. -- [plotly](https://plotly.com/r/) +- [GEOQuery](https://pubmed.ncbi.nlm.nih.gov/17496320/) - > C. Sievert (2020). Interactive Web-Based Data Visualization with R, plotly, and shiny. Chapman and Hall/CRC Florida. + > Davis S, Meltzer PS. Geoquery: a bridge between the gene expression omnibus (Geo) and bioconductor. Bioinformatics. 2007;23(14):1846-1847. + +- [Limma](https://pubmed.ncbi.nlm.nih.gov/25605792/) + + > Ritchie ME, Phipson B, Wu D, et al. Limma powers differential expression analyses for rna-sequencing and microarray studies. Nucleic Acids Res. 2015;43(7):e47. - [optparse](https://CRAN.R-project.org/package=optparse) > Trevor L Davis (2018). optparse: Command Line Option Parser. -- [RColorBrewer](https://CRAN.R-project.org/package=RColorBrewer) +- [plotly](https://plotly.com/r/) - > Erich Neuwirth (2014). RColorBrewer: ColorBrewer Palettes. + > C. Sievert (2020). Interactive Web-Based Data Visualization with R, plotly, and shiny. Chapman and Hall/CRC Florida. -- [SummarizedExperiment](https://bioconductor.org/packages/release/bioc/html/SummarizedExperiment.html) +- [Proteus](https://doi.org/10.1101/416511) - > Morgan M, Obenchain V, Hester J and Pagès H (2020). SummarizedExperiment: SummarizedExperiment container. + > Gierlinski M, Gastaldello F, Cole C, Barton GJ. Proteus : An r Package for Downstream Analysis of Maxquant Output. Bioinformatics; 2018. + +- [R](https://www.R-project.org/) + + > R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. + +- [RColorBrewer](https://CRAN.R-project.org/package=RColorBrewer) + + > Erich Neuwirth (2014). RColorBrewer: ColorBrewer Palettes. - [RMarkdown](https://rmarkdown.rstudio.com) > JJ Allaire and Yihui Xie and Jonathan McPherson and Javier Luraschi and Kevin Ushey and Aron Atkins and Hadley Wickham and Joe Cheng and Winston Chang and Richard Iannone (2022). rmarkdown: Dynamic Documents for R. +- [shinyngs](https://github.com/pinin4fjords/shinyngs) + + > Jonathan R Manning (2022). Shiny apps for NGS etc based on reusable components created using Shiny modules. Computer software. Vers. 1.5.3. Jonathan Manning, Dec. 2022. Web. + +- [SummarizedExperiment](https://bioconductor.org/packages/release/bioc/html/SummarizedExperiment.html) + + > Morgan M, Obenchain V, Hester J and Pagès H (2020). SummarizedExperiment: SummarizedExperiment container. + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) @@ -68,5 +84,8 @@ - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index f4fd052f..c089ec78 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,18 +1,20 @@ -# Code of Conduct at nf-core (v1.0) +# Code of Conduct at nf-core (v1.4) ## Our Pledge -In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: - Age +- Ability - Body size +- Caste - Familial status - Gender identity and expression - Geographical location - Level of experience - Nationality and national origins - Native language -- Physical and neurological ability +- Neurodiversity - Race or ethnicity - Religion - Sexual identity and orientation @@ -22,80 +24,133 @@ Please note that the list above is alphabetised and is therefore not ranked in a ## Preamble -> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: -An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. -We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. -Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. -We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. -Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. ## Our Responsibilities -The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. -The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. -Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. -## When are where does this Code of Conduct apply? +## When and where does this Code of Conduct apply? -Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): - Communicating with an official project email address. - Communicating with community members within the nf-core Slack channel. - Participating in hackathons organised by nf-core (both online and in-person events). -- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. -- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. - Representing nf-core on social media. This includes both official and personal accounts. ## nf-core cares 😊 -nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): - Ask for consent before sharing another community member’s personal information (including photographs) on social media. - Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. -- Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) - Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) - Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) - Focus on what is best for the team and the community. (When in doubt, ask) -- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. +- Accept feedback, yet be unafraid to question, deliberate, and learn. - Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) -- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) - Take breaks when you feel like you need them. -- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) ## nf-core frowns on 😕 -The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: - Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. - “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. - Spamming or trolling of individuals on social media. -- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. -- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. ### Online Trolling -The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. -All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. -## Procedures for Reporting CoC violations +## Procedures for reporting CoC violations If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. -You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. -Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. -All reports will be handled with utmost discretion and confidentially. +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. ## Attribution and Acknowledgements @@ -106,6 +161,22 @@ All reports will be handled with utmost discretion and confidentially. ## Changelog -### v1.0 - March 12th, 2021 +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 - Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/README.md b/README.md index 2cb1013d..94a82262 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,23 @@ # ![nf-core/differentialabundance](docs/images/nf-core-differentialabundance_logo_light.png#gh-light-mode-only) ![nf-core/differentialabundance](docs/images/nf-core-differentialabundance_logo_dark.png#gh-dark-mode-only) -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/differentialabundance/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7568000-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7568000) +[![GitHub Actions CI Status](https://github.com/nf-core/differentialabundance/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/differentialabundance/actions?query=workflow%3A%22nf-core+CI%22) +[![GitHub Actions Linting Status](https://github.com/nf-core/differentialabundance/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/differentialabundance/actions?query=workflow%3A%22nf-core+linting%22)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/differentialabundance/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7568000-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7568000) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/differentialabundance) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23differentialabundance-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/differentialabundance)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23differentialabundance-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/differentialabundance)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction -**nf-core/differentialabundance** is a bioinformatics pipeline that can be used to analyse data represented as matrices, comparing groups of observations to generate differential statistics and downstream analyses. The initial feature set is built around RNA-seq, but we anticipate rapid expansion to include other platforms. +**nf-core/differentialabundance** is a bioinformatics pipeline that can be used to analyse data represented as matrices, comparing groups of observations to generate differential statistics and downstream analyses. The pipeline supports RNA-seq data such as that generated by the nf-core [rnaseq workflow](https://github.com/nf-core/rnaseq), and Affymetrix arrays via .CEL files. Other types of matrix may also work with appropriate changes to parameters, and PRs to support additional specific modalities are welcomed. The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! -On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/differentialabundance/results). +On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/differentialabundance/results). ## Pipeline summary @@ -30,26 +31,13 @@ On release, automated continuous integration tests run the pipeline on a full-si 6. Optionally build and (if specified) deploy a Shiny app for fully interactive mining of results. 7. Build an HTML report based on R markdown, with interactive plots (where possible) and tables. -## Quick Start +## Usage -1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=22.10.1`) - -2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_. - -3. Download the pipeline and test it on a minimal dataset with a single command: - - ```bash - nextflow run nf-core/differentialabundance -profile test,YOURPROFILE --outdir - ``` - - Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. - - > - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. - > - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. - > - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. - > - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. - -4. Start running your own analysis! +:::note +If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how +to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) +with `-profile test` before running the workflow on actual data. +::: RNA-seq: @@ -74,15 +62,25 @@ Affymetrix microarray: -profile affy, ``` +:::warning +Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those +provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). +::: + +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/differentialabundance/usage) and the [parameter documentation](https://nf-co.re/differentialabundance/parameters). + ### Reporting The pipeline reports its outcomes in two forms. -#### Markdown-derived HTML report +#### R markdown and HTML + +The primary workflow output is an HTML-format report produced from an [R markdown template](assets/differentialabundance_report.Rmd) (you can also supply your own). This leverages helper functions from [shinyngs](https://github.com/pinin4fjords/shinyngs) to produce rich plots and tables, but does not provide significant interactivity. ![screenshot of the markdown report](docs/images/markdown_report.png "Markdown report") -The primary workflow output is an HTML-format report produced from an [R markdown template](assets/differentialabundance_report.Rmd). This leverages helper functions from [shinyngs](https://github.com/pinin4fjords/shinyngs) to produce rich plots and tables, but does not provide significant interactivity. +Additionally, a zip file is produced by the pipeline, containing an R markdown file and all necessary file inputs for reporting. The markdown file is the same as the input template, but with the parameters set appropriately, so that you can run the reporting yourself in RStudio, and add any customisations you need. #### Shiny-based data mining app @@ -94,9 +92,11 @@ A second optional output is produced by leveraging [shinyngs](https://github.com By default the application is provided as an R script and associated serialised data structure, which you can use to quickly start the application locally. With proper configuration the app can also be deployed to [shinyapps.io](https://www.shinyapps.io/) - though this requires you to have an account on that service (free tier available). -## Documentation +## Pipeline output -The nf-core/differentialabundance pipeline comes with documentation about the pipeline [usage](https://nf-co.re/differentialabundance/usage), [parameters](https://nf-co.re/differentialabundance/parameters) and [output](https://nf-co.re/differentialabundance/output). +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/differentialabundance/results) tab on the nf-core website pipeline page. +For more details about the output files and reports, please refer to the +[output documentation](https://nf-co.re/differentialabundance/output). ## Credits diff --git a/assets/differentialabundance_report.Rmd b/assets/differentialabundance_report.Rmd index 0da0b5a6..d6b654b8 100644 --- a/assets/differentialabundance_report.Rmd +++ b/assets/differentialabundance_report.Rmd @@ -33,11 +33,20 @@ params: features_id_col: NULL features_name_col: NULL features_metadata_cols: NULL + features_gtf_feature_type: NULL + features_gtf_table_first_field: NULL + features_log2_assays: NULL raw_matrix: null # e.g. 0_salmon.merged.gene_counts.tsv normalised_matrix: null variance_stabilised_matrix: null # e.g. test_files/3_treatment-WT-P23H.vst.tsv contrasts_file: null # e.g. GSE156533.contrasts.csv differential_table: file.csv + proteus_measurecol_prefix: NULL + proteus_norm_function: NULL + proteus_plotsd_method: NULL + proteus_plotmv_loess: NULL + proteus_palette_name: NULL + proteus_round_digits: NULL affy_cel_files_archive: NULL affy_file_name_col: NULL affy_background: NULL @@ -208,7 +217,9 @@ if (! params$observations_name_col %in% colnames(observations)){ if (! is.null(params$features)){ features <- read_metadata(file.path(params$input_dir, params$features)) - features <- features[,colnames(features) %in% simpleSplit(params$features_metadata_cols), drop = FALSE] + if (! is.null(params$features_metadata_cols)){ + features <- features[,colnames(features) %in% simpleSplit(params$features_metadata_cols), drop = FALSE] + } } contrasts <- read_metadata(file.path(params$input_dir, params$contrasts_file)) @@ -231,26 +242,27 @@ names(assay_names) = assay_names assay_files <- lapply(assay_names, function(x) params[[paste0(x, '_matrix')]]) assay_data <- lapply(assay_files, function(x) { - mat <- read_matrix( - x, - sample_metadata = observations, - row.names = 1 + mat <- na.omit( + read_matrix( + x, + sample_metadata = observations, + row.names = 1 + ) ) colnames(mat) <- observations[[params$observations_name_col]][match(colnames(mat), rownames(observations))] - - # Bit hacky, but ensure log - if (max(mat) > 20){ - log2(mat+1) - }else{ - mat - } + mat }) +if (!is.null(params$features_log2_assays)) { + # Remove brackets from assay list. TODO: Remove if this is added to cond_log2_transform_assays + features_log2_assays <- gsub('\\]$', '', gsub('^\\[', '', params$features_log2_assays)) + assay_data <- cond_log2_transform_assays(assay_data, features_log2_assays) +} + # Now we can rename the observations rows using the title field rownames(observations) <- observations[[params$observations_name_col]] # Run PCA early so we can understand how important each variable is - pca_datas <- lapply(names(assay_data), function(assay_type){ compilePCAData(assay_data[[assay_type]]) }) @@ -293,10 +305,7 @@ treatment-mCherry-hND6-batcheffect.deseq2.results.tsv ```{r, echo=FALSE} -prefix_part_names <- c('variable', 'reference', 'target', 'blocking') -diff_prefixes <- sub('-$', '', apply(contrasts[,prefix_part_names], 1, function(x) paste(x, collapse = '-'))) - -differential_files <- lapply(diff_prefixes, function(d){ +differential_files <- lapply(contrasts$id, function(d){ file.path(params$input_dir, paste0(gsub(' |;', '_', d), params$differential_file_suffix)) }) @@ -320,22 +329,36 @@ differential_results <- lapply(differential_files, function(diff_file){ } # Annotate differential tables if possible - if (! is.null(params$features)){ diff <- merge(features, diff, by.x = params$features_id_col, by.y = params$differential_feature_id_column) } diff }) -names(differential_results) <- diff_prefixes +names(differential_results) <- contrasts$id ``` ```{r, echo=FALSE} -contrast_descriptions <- paste(contrasts$target, 'versus', contrasts$reference, 'in', contrasts$variable) -with_blocking <- which(contrasts$blocking != '') -contrast_descriptions[with_blocking] <- paste0(contrast_descriptions[with_blocking], " (blocking on ", contrasts$blocking[with_blocking],")") +# Function to make friendly contrast name from contrast components, including optional bits + +name_contrast <- function(i){ + contrast_name <- paste(contrasts$target[i], 'versus', contrasts$reference[i], 'in', contrasts$variable[i]) + contrast_vals <- contrasts[i,] + populated <- colnames(contrasts)[! (is.na(contrast_vals) | contrast_vals == '' | is.null(contrast_vals))] + optional <- setdiff(populated, c('id', 'target', 'reference', 'variable')) + + if (length(optional) > 0){ + optional_part <- paste0('(', paste(paste(optional, contrasts[i,optional], sep=': '), collapse=', '), ')') + }else{ + optional_part <- '' + } + + paste(contrast_name, optional_part) +} + +contrast_descriptions <- unlist(lapply(1:nrow(contrasts), function(x) name_contrast(x))) # Check both adjusted and unadjusted p values @@ -409,7 +432,7 @@ print( htmltools::tagList(datatable(observations_to_print, caption = paste(ucfir ## Contrasts -Comparisons were made between `r params$observations_type` groups defined using using `r params$observation_type` metadata columns, as described in the following table of contrasts: +Comparisons were made between `r params$observations_type` groups defined using `r params$observation_type` metadata columns, as described in the following table of contrasts: ```{r, echo=FALSE, results='asis'} contrasts_to_print <- contrasts @@ -432,6 +455,7 @@ The following plots show the abundance value distributions of input matrices. A ```{r, include=FALSE} ``` + #### Box plots ```{r, echo=FALSE, results='asis', fig.height=8} @@ -577,7 +601,6 @@ for (assay_type in rev(names(assay_data))){ variable_genes <- selectVariableGenes(matrix = assay_data[[assay_type]], ntop = params$exploratory_n_features) dendroColorScale <- makeColorScale(length(unique(observations[[iv]])), palette = params$exploratory_palette_name) - p <- clusteringDendrogram( 2^assay_data[[assay_type]][variable_genes, ], observations[, iv, drop = FALSE], @@ -601,16 +624,17 @@ for (assay_type in rev(names(assay_data))){ } ``` -### Outlier detection {.tabset} - -Outlier detection based on [median absolute deviation](https://wiki.arrayserver.com/wiki/index.php?title=CorrelationQC.pdf) was undertaken, the outlier scoring is plotted below. - ```{r, echo=FALSE, results='asis', warning=FALSE} # We can't look for ouliers in sets of less than 3 samples, so exclude variables # unless the minimum group size is larger than that iv_min_group_sizes <- unlist(lapply(informative_variables, function(x) min(table(observations[[x]])))) +if (any(iv_min_group_sizes > 2)){ + cat("\n### Outlier detection {.tabset}\n") + cat("\nOutlier detection based on [median absolute deviation](https://wiki.arrayserver.com/wiki/index.php?title=CorrelationQC.pdf) was undertaken, the outlier scoring is plotted below.\n") +} + foo <- lapply(informative_variables[iv_min_group_sizes > 2], function(iv){ cat(paste("\n####", iv, "\n")) @@ -622,27 +646,29 @@ foo <- lapply(informative_variables[iv_min_group_sizes > 2], function(iv){ groupby = iv ) - mad_plot_args <- list( - x = plotdata$group, - y = plotdata$mad, - color = plotdata$outlier, - hline_thresholds = c("Outlier threshold" = params$exploratory_mad_threshold), - palette = makeColorScale(2, palette = params$differential_palette_name), - legend_title = "Outlier status", - labels = rownames(plotdata), - show_labels = TRUE, - xlab = "Sample group", - ylab = "MAD score" - ) + if (! is.null(plotdata)){ + mad_plot_args <- list( + x = plotdata$group, + y = plotdata$mad, + color = plotdata$outlier, + hline_thresholds = c("Outlier threshold" = params$exploratory_mad_threshold), + palette = makeColorScale(2, palette = params$differential_palette_name), + legend_title = "Outlier status", + labels = rownames(plotdata), + show_labels = TRUE, + xlab = "Sample group", + ylab = "MAD score" + ) - print(htmltools::tagList(do.call("plotly_scatterplot", mad_plot_args))) + print(htmltools::tagList(do.call("plotly_scatterplot", mad_plot_args))) - outliers <- rownames(plotdata)[plotdata$outlier] + outliers <- rownames(plotdata)[plotdata$outlier] - if (length(outliers) == 0){ - cat(paste0("No outlying samples were detected in groups defined by ", iv,".\n")) - }else{ - cat(paste0(length(outliers), ' possible outliers were detected in groups defined by ', iv ,': ', paste(outliers, collapse=', '), "\n")) + if (length(outliers) == 0){ + cat(paste0("No outlying samples were detected in groups defined by ", iv,".\n")) + }else{ + cat(paste0(length(outliers), ' possible outliers were detected in groups defined by ', iv ,': ', paste(outliers, collapse=', '), "\n")) + } } }) @@ -793,6 +819,11 @@ if (any(unlist(params[paste0(possible_gene_set_methods, '_run')]))){ # Methods +```{r, echo=FALSE, results='asis', eval=params$study_type == 'maxquant'} +cat(paste0("\n## Protein abundance import\n")) +make_params_table('importing maxquant output', 'proteus_', remove_pattern = TRUE) +``` + ## Filtering ```{r, echo=FALSE, results='asis'} diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 5f0030c9..198410ef 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -6,13 +6,17 @@ plot_type: "html" ## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

-

Data was processed using nf-core/differentialabundance v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

Data was processed using nf-core/differentialabundance v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

${workflow.commandLine}
+

${tool_citations}

References

    -
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
  • -
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. https://doi.org/10.1038/s41587-020-0439-x
  • +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • +
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • +
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • + ${tool_bibliography}
Notes:
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index b44b5f2d..15a57fd3 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/differentialabundance + This report has been generated by the nf-core/differentialabundance analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-differentialabundance-methods-description": order: -1000 diff --git a/assets/nf-core-differentialabundance_logo_light.png b/assets/nf-core-differentialabundance_logo_light.png index 2a952115..5ab625ef 100644 Binary files a/assets/nf-core-differentialabundance_logo_light.png and b/assets/nf-core-differentialabundance_logo_light.png differ diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv deleted file mode 100644 index 5f653ab7..00000000 --- a/assets/samplesheet.csv +++ /dev/null @@ -1,3 +0,0 @@ -sample,fastq_1,fastq_2 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, diff --git a/assets/schema_input.json b/assets/schema_input.json index f30e31fa..6a90aaeb 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -11,26 +11,8 @@ "type": "string", "pattern": "^\\S+$", "errorMessage": "Sample name must be provided and cannot contain spaces" - }, - "fastq_1": { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" - }, - "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] } }, - "required": ["sample", "fastq_1"] + "required": [] } } diff --git a/assets/slackreport.json b/assets/slackreport.json index 043d02f2..1e44b419 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "sanger-tol/readmapping v${version} - ${runName}", + "author_name": "nf-core/differentialabundance v${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 11b15572..4a758fe0 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -158,9 +158,6 @@ def sniff_format(handle): peek = read_head(handle) handle.seek(0) sniffer = csv.Sniffer() - if not sniffer.has_header(peek): - logger.critical("The given sample sheet does not appear to contain a header.") - sys.exit(1) dialect = sniffer.sniff(peek) return dialect diff --git a/conf/base.config b/conf/base.config index 3821c03a..cbc9311c 100644 --- a/conf/base.config +++ b/conf/base.config @@ -14,7 +14,7 @@ process { memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' diff --git a/conf/igenomes.config b/conf/igenomes.config index 7a1b3ac6..3f114377 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -36,6 +36,14 @@ params { macs_gsize = "2.7e9" blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" } + 'CHM13' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" + gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" + mito_name = "chrM" + } 'GRCm38' { fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" diff --git a/conf/maxquant.config b/conf/maxquant.config new file mode 100644 index 00000000..c2f081ca --- /dev/null +++ b/conf/maxquant.config @@ -0,0 +1,45 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running MaxQuant proteomics analysis +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines settings specific to MaxQuant proteomics analysis + + Use as follows: + nextflow run nf-core/differentialabundance -profile maxquant, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + + config_profile_name = 'MaxQuant profile' + config_profile_description = 'Settings for MaxQuant analysis' + + // Study + study_type = 'maxquant' + study_abundance_type = 'intensities' + + // Features + features_id_col = 'Majority protein IDs' + features_name_col = 'Majority protein IDs' + features_metadata_cols = 'Majority protein IDs' + features_type = 'protein' + + // Exploratory + exploratory_assay_names = "raw,normalised" + exploratory_final_assay = "normalised" + + // Differential options + differential_file_suffix = ".limma.results.tsv" + differential_fc_column = "logFC" + differential_pval_column = "P.Value" + differential_qval_column = "adj.P.Val" + differential_feature_id_column = "probe_id" + differential_feature_name_column = "Majority protein IDs" + + // Proteus options + proteus_measurecol_prefix = 'LFQ intensity ' + + // Shiny does not work for this datatype + shinyngs_build_app = false +} diff --git a/conf/modules.config b/conf/modules.config index 9f116820..8b3b66e7 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -32,7 +32,7 @@ process { pattern: '*.anno.tsv' ] ] - ext.args = "--feature-type transcript" + ext.args = "--feature-type '${params.features_gtf_feature_type}' --first-field '${params.features_gtf_table_first_field}'" } withName: VALIDATOR { @@ -100,18 +100,107 @@ process { ].join(' ').trim() } } - withName: DESEQ2_DIFFERENTIAL { + withName: PROTEUS { publishDir = [ [ - path: { "${params.outdir}/tables/differential" }, + path: { "${params.outdir}/tables/proteus/${meta.id}/" }, mode: params.publish_dir_mode, - pattern: '*.deseq2.results.tsv' + pattern: '*.tsv' + ], + [ + path: { "${params.outdir}/plots/proteus/${meta.id}/" }, + mode: params.publish_dir_mode, + pattern: '*.png' + ], + [ + path: { "${params.outdir}/other/proteus/${meta.id}/" }, + mode: params.publish_dir_mode, + pattern: '*.rds' + + ], + [ + path: { "${params.outdir}/other/proteus/" }, + mode: params.publish_dir_mode, + pattern: '*sessionInfo.log' + ] + ] + ext.args = { [ + "--contrast_variable \"${meta.id}\"", + "--sample_id_col \"${params.observations_id_col}\"", + "--protein_id_col \"${params.features_id_col}\"", + "--measure_col_prefix \"${params.proteus_measurecol_prefix}\"", + "--norm_function $params.proteus_norm_function", + "--plotsd_method $params.proteus_plotsd_method", + "--plotmv_loess $params.proteus_plotmv_loess", + "--palette_name $params.proteus_palette_name", + "--round_digits $params.proteus_round_digits" + ].join(' ').trim() } + } + + withName: GEOQUERY_GETGEO { + publishDir = [ + [ + path: { "${params.outdir}/tables/processed_abundance" }, + mode: params.publish_dir_mode, + pattern: '*.matrix.tsv' + ], + [ + path: { "${params.outdir}/tables/annotation" }, + mode: params.publish_dir_mode, + pattern: '*.annotation.tsv' ], + [ + path: { "${params.outdir}/other/affy" }, + mode: params.publish_dir_mode, + pattern: '*.{rds,sessionInfo.log}' + ] + ] + ext.prefix = { "normalised." } + ext.args = { + ((params.features_metadata_cols == null) ? '' : "--metacols \"${params.features_metadata_cols}\"") + } + } + + withName: DESEQ2_NORM { + ext.prefix = 'all' + publishDir = [ [ path: { "${params.outdir}/tables/processed_abundance" }, mode: params.publish_dir_mode, pattern: '*.{normalised_counts,vst,rlog}.tsv' ], + ] + ext.args = { [ + "--sample_id_col \"${params.observations_id_col}\"", + "--test $params.deseq2_test", + "--fit_type $params.deseq2_fit_type", + "--sf_type $params.deseq2_sf_type", + "--min_replicates_for_replace $params.deseq2_min_replicates_for_replace", + "--use_t $params.deseq2_use_t", + "--lfc_threshold $params.deseq2_lfc_threshold", + "--alt_hypothesis $params.deseq2_alt_hypothesis", + "--independent_filtering $params.deseq2_independent_filtering", + "--p_adjust_method $params.deseq2_p_adjust_method", + "--alpha $params.deseq2_alpha", + "--minmu $params.deseq2_minmu", + "--vs_method $params.deseq2_vs_method", + "--vs_blind $params.deseq2_vs_blind", + "--vst_nsub $params.deseq2_vst_nsub", + "--shrink_lfc $params.deseq2_shrink_lfc", + "--cores $params.deseq2_cores", + "--subset_to_contrast_samples $params.differential_subset_to_contrast_samples", + ((meta.blocking == null) ? '' : "--blocking_variables $meta.blocking"), + ].join(' ').trim() } + } + + withName: DESEQ2_DIFFERENTIAL { + ext.prefix = { "${meta.id}" } + publishDir = [ + [ + path: { "${params.outdir}/tables/differential" }, + mode: params.publish_dir_mode, + pattern: '*.deseq2.results.tsv' + ], [ path: { "${params.outdir}/plots/qc" }, mode: params.publish_dir_mode, @@ -124,9 +213,11 @@ process { ] ] ext.args = { [ + "--gene_id_col \"${params.features_id_col}\"", "--sample_id_col \"${params.observations_id_col}\"", "--test $params.deseq2_test", "--fit_type $params.deseq2_fit_type", + "--sizefactors_from_controls $params.sizefactors_from_controls", "--sf_type $params.deseq2_sf_type", "--min_replicates_for_replace $params.deseq2_min_replicates_for_replace", "--use_t $params.deseq2_use_t", @@ -149,6 +240,7 @@ process { } withName: LIMMA_DIFFERENTIAL { + ext.prefix = { "${meta.id}" } publishDir = [ [ path: { "${params.outdir}/tables/differential" }, @@ -236,7 +328,8 @@ process { "--assay_names \"${params.exploratory_assay_names}\"", "--final_assay \"${params.exploratory_final_assay}\"", "--outlier_mad_threshold ${params.exploratory_mad_threshold}", - "--palette_name \"${params.exploratory_palette_name}\"" + "--palette_name \"${params.exploratory_palette_name}\"", + ( (params.study_type == 'maxquant') ? "--log2_assays ''" : (((params.features_log2_assays == null) ? '' : "--log2_assays \"$params.features_log2_assays\"".replace('[', '').replace(']', ''))) ) ].join(' ').trim() } } @@ -272,6 +365,7 @@ process { "--assay_names \"${params.exploratory_assay_names}\"", "--sample_id_col \"${params.observations_id_col}\"", "--feature_id_col \"${params.features_id_col}\"", + "--feature_name_col \"${params.features_name_col}\"", "--diff_feature_id_col \"${params.differential_feature_id_column}\"", "--fold_change_column \"${params.differential_fc_column}\"", "--pval_column \"${params.differential_pval_column}\"", @@ -296,8 +390,8 @@ process { } withName: RMARKDOWNNOTEBOOK { - conda = "bioconda::r-shinyngs=1.7.1" - container = { "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.7.1--r42hdfd78af_1':'quay.io/biocontainers/r-shinyngs:1.7.1--r42hdfd78af_1' }" } + conda = "bioconda::r-shinyngs=1.8.4" + container = { "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.8.4--r43hdfd78af_0' : 'biocontainers/r-shinyngs:1.8.4--r43hdfd78af_0' }" } publishDir = [ path: { "${params.outdir}/report" }, mode: params.publish_dir_mode, @@ -305,6 +399,14 @@ process { ] } + withName: MAKE_REPORT_BUNDLE { + publishDir = [ + path: { "${params.outdir}/report" }, + mode: params.publish_dir_mode, + pattern: '*.zip' + ] + } + withName: CUSTOM_MATRIXFILTER { publishDir = [ enabled: false diff --git a/conf/soft.config b/conf/soft.config new file mode 100644 index 00000000..31d09ec2 --- /dev/null +++ b/conf/soft.config @@ -0,0 +1,46 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running SOFT array file analysis +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines settings specific to array analysis with SOFT files from GEO + + Use as follows: + nextflow run nf-core/differentialabundance -profile soft, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + + config_profile_name = 'SOFT matrix track test profile' + config_profile_description = 'Minimal settings for test of the SOFT matrix track' + + // Study + study_type = 'geo_soft_file' + study_abundance_type = 'intensities' + + // Observations + observations_id_col = 'id' + observations_name_col = 'id' + + + // Features + features_id_col = 'ID' + features_metadata_cols = 'ID,ENTREZ_GENE_ID,Gene Symbol,Sequence Type' + features_name_col = 'Gene Symbol' + + + // Exploratory + exploratory_assay_names = 'normalised' + exploratory_final_assay = 'normalised' + + // Differential options + differential_file_suffix = ".limma.results.tsv" + differential_fc_column = "logFC" + differential_pval_column = "P.Value" + differential_qval_column = "adj.P.Val" + differential_feature_id_column = "probe_id" + differential_feature_name_column = "Symbol" + +} + diff --git a/conf/test_maxquant.config b/conf/test_maxquant.config new file mode 100644 index 00000000..89d31cca --- /dev/null +++ b/conf/test_maxquant.config @@ -0,0 +1,37 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple + pipeline test with MaxQuant Mass-spec data. + + Use as follows: + nextflow run nf-core/differentialabundance -profile test_maxquant, --outdir + +---------------------------------------------------------------------------------------- +*/ + +includeConfig 'maxquant.config' + +params { + study_name = 'PXD043349' + config_profile_name = 'MaxQuant test profile' + config_profile_description = 'MaxQuant test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/proteomics/maxquant/MaxQuant_samplesheet.tsv' + matrix = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/proteomics/maxquant/MaxQuant_proteinGroups.txt' + contrasts = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/proteomics/maxquant/MaxQuant_contrasts.csv' + + // Observations + observations_id_col = 'Experiment' + observations_name_col = 'Name' + + // Exploratory + exploratory_main_variable = 'Celltype' +} diff --git a/conf/test_soft.config b/conf/test_soft.config new file mode 100644 index 00000000..1fc21677 --- /dev/null +++ b/conf/test_soft.config @@ -0,0 +1,32 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple + pipeline test with SOFT array files from GEO. + + Use as follows: + nextflow run nf-core/differentialabundance -profile test_soft_array, --outdir + +---------------------------------------------------------------------------------------- +*/ + +includeConfig 'soft.config' + +params { + + config_profile_name = 'SOFT matrix track test profile' + config_profile_description = 'Minimal settings for test of the SOFT matrix track' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/differentialabundance/testdata/GSE50790.csv' + contrasts = 'https://raw.githubusercontent.com/nf-core/test-datasets/differentialabundance/testdata/GSE50790_contrasts.csv' + querygse = 'GSE50790' + +} + diff --git a/docs/images/workflow.png b/docs/images/workflow.png index 6fe5e03e..5b73b936 100644 Binary files a/docs/images/workflow.png and b/docs/images/workflow.png differ diff --git a/docs/images/workflow.svg b/docs/images/workflow.svg index 30f4a8fe..21313a3c 100644 --- a/docs/images/workflow.svg +++ b/docs/images/workflow.svg @@ -2,12 +2,12 @@ + inkscape:current-layer="layer1" + inkscape:export-bgcolor="#ffffffff" /> + id="defs1"> + width="297.88281" + height="99.132004" + id="rect11806-7" /> + + + + + + x="182.42953" + y="205.23322" + width="331.60153" + height="103.94896" + id="rect11806-3-6-9" /> + + + + + width="320.52795" + height="105.38121" + id="rect11806-1-2" /> + width="320.52795" + height="105.38121" + id="rect11806-1-58" /> + width="228.03691" + height="100.33624" + id="rect11806-1-0-6-7-6" /> + width="228.03691" + height="100.33624" + id="rect11806-1-0-6-7-6-4" /> + + width="228.03691" + height="100.33624" + id="rect11806-1-0-6-7-6-2-1" /> + + + + + + transform="translate(-20.072682,-5.8950489)"> + + + + + + + + + getGEO + justRMA + readProteinGroups + GTF to table + Validate + Limma + GSEA + DESeq2 + Filter matrix - - - - - - - - - - - - - - - - - - - - - - - - RNA-seq - Affymetrix microarray - - + id="g9404-0-1-9-1" + transform="matrix(0.20995105,0,0,0.20995105,-95.820292,110.48647)" + style="stroke-width:1.3789"> - - - - - HTML + + + GTF + + - - - - - HTML + + + TSV + + - - - - - PNG + + + TSV + + - - - - - PNG + + - - - - - - - Plot differential - GSEA - Plot exploratory - Limma - justRMA(raw) - justRMA(normalised) - DESeq2 - Validate - Filter matrix - R Markdown notebook - Build Shiny app - + TSV + + + + + + + + + + + + + - - - vcf - - - - - vcf - - - - - CEL - - + id="g10541-6-2-37-7-8-3-0-3-6" + transform="matrix(0.61667376,0,0,-0.61667376,367.9137,699.73301)" + style="stroke-width:1.3789"> + + - Observation (sample) annotations + TSV + + + + + + + + + + - - - - + id="g9201-4" + transform="matrix(0.4356832,0,0,-0.4356832,740.01999,372.05432)" + style="display:inline;stroke-width:1.42254"> + TSV + id="rect9191-3" />vcf + - Contrast definitions - - - - + transform="matrix(0.4356832,0,0,-0.4356832,737.58022,374.48441)" + id="g9215-2" + style="display:inline;stroke-width:1.42254"> + TSV + height="53.599998" + x="474.39999" + y="-1904.0004" + style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro';fill:#ffffff;stroke-width:1.42254px" + transform="scale(1,-1)" />vcf + + Raw intensities + style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.8717px;line-height:0.01%;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro';letter-spacing:0px;word-spacing:0px;fill:#ffffff;fill-opacity:1;stroke:none;stroke-width:0.619777px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" + x="965.24017" + y="-311.69821" + id="text9244-1"> STR + sodipodi:nodetypes="ccsscccccccccc" + style="fill:#ffffff;stroke-width:0.619777" /> + + GEO ID + + + + + + + + + + + + + + + + + + + + + + + - - - - + id="g9201" + transform="matrix(0.4356832,0,0,-0.4356832,740.01999,372.05432)" + style="display:inline;stroke-width:1.42254"> + TSV + id="rect9191" />vcf + - - - - + transform="matrix(0.4356832,0,0,-0.4356832,737.58022,374.48441)" + id="g9215" + style="display:inline;stroke-width:1.42254"> + TSV + height="53.599998" + x="474.39999" + y="-1904.0004" + style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro';fill:#ffffff;stroke-width:1.42254px" + transform="scale(1,-1)" />vcf + - - - - - TSV + transform="matrix(0.4356832,0,0,-0.4356832,735.14039,376.91441)" + id="g9229" + style="display:inline;stroke-width:1.42254"> + + CEL + - - - + + Maxquant output + Affy intensities + Reference annotation + + - - - - - TSV + + + CSV + + + + Contrast definitions + + + + Feature annotations + + + + Abundance values + + + Raw abundance matrix + id="tspan32">Observation annotations + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Plot exploratory + + Plot differential + + + + R Markdown notebook + + Build Shiny app + + - - - - - GTF + + + + HTML + + + + + + HTML + + + + + - - - - - TSV + + + + TSV + + + + + + + TSV + + + + + + + TSV + + + + + + + PNG + + + + + + PNG + + + Feature annotations + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.64444px;line-height:1.25;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro Bold';text-align:start;letter-spacing:0px;word-spacing:0px;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583;stroke-opacity:1" + x="65.707413" + y="378.30081" + id="text2589-7-3-2-0">RNA-seq GTF to GEO SOFT file + table - - - - - - - + id="tspan2587-9-5-7-9-5-0" + x="65.707413" + y="400.04837" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.64444px;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro Bold';text-align:start;text-anchor:start;fill:#000000;stroke:none;stroke-width:0.264583;stroke-opacity:1">Maxquant proteomics + Affymetrix microarray - - - - - - - - + Common path + + - + Entry point + Process + Major output - - + cy="373.8096" + cx="161.53502" + id="path3447-6-0-9-30-9-1-3-8-3-9-7-4-5" + style="fill:#1a1a1a;stroke:#000000;stroke-width:2.64583;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none;paint-order:stroke markers fill" /> + diff --git a/docs/output.md b/docs/output.md index 4bf07ea8..26c247ec 100644 --- a/docs/output.md +++ b/docs/output.md @@ -13,6 +13,7 @@ This directory contains the main reporting output of the workflow. - `report/` - `*.html`: an HTML report file named according to the value of `params.study_name`, containing graphical and tabular summary results for the workflow run. + - `*.zip`: a zip file containing an R markdown file with parameters set and all necessary input files to open and customise the reporting. @@ -37,6 +38,11 @@ Stand-alone graphical outputs are placed in this directory. They may be useful i - `[contrast]/png/volcano.png`: Volcano plots of -log(10) p value agains log(2) fold changes - `gsea/`: Directory containing graphical outputs from GSEA (where enabled). Plots are stored in directories named for the associated contrast. - `[contrast]/png/[gsea_plot_type].png` + - `proteus/`: If `--study_type maxquant`: Directory containing plots produced by the proteus module which is used for processing MaxQuant input. Files are prefixed with the associated contrast and chosen normalization function (if any). + - `[contrast]/[norm_function].normalized_dendrogram.png`: A sample clustering dendrogram after normalization. + - `[contrast]/[norm_function].normalized_mean_variance_relationship.png`: Plots of log intensity vs mean log intensity after normalization of each contrast level. + - `[contrast]/[norm_function].normalized_distributions.png`: A plot of sample distributions after normalization. + - `[contrast]/raw_distributions.png`: A plot of sample distributions without normalization. @@ -61,6 +67,9 @@ Most plots are included in the HTML report (see above), but are also included in - `OR [contrast_name].limma.results.tsv`: Results of Limma differential analyis (Affymetrix arrays) - `gsea/`: Directory containing tables of differential gene set analyis from GSEA (where enabled) - `[contrast]/[contrast].gsea_report_for_[condition].tsv`: A GSEA report table for each side of each contrast + - `proteus/`: If `--study_type maxquant`: Directory containing abundance values produced by the proteus module which is used for processing MaxQuant input. Files are prefixed with the associated contrast and chosen normalization function (if any). + - `[contrast]/[norm_function].normalized_proteingroups_tab.tsv`: Abundance table after normalization. + - `[contrast]/raw_proteingroups_tab.tsv`: Abundance table without normalization. @@ -68,15 +77,17 @@ The `differential` folder is likely to be the core result set for most users, co ## Shiny app +
+Output files + - `shinyngs_app/` - `[study name]`: - `data.rds`: serialized R object which can be used to generate a Shiny application - `app.R`: minimal R script that will source the data object and generate the app -The app must be run in an environment with [ShinyNGS](https://github.com/pinin4fjords/shinyngs) installed, or you can see the workflow parameters to deploy to shinyapps.io (see usage documentation). +
-
-Output files +The app must be run in an environment with [ShinyNGS](https://github.com/pinin4fjords/shinyngs) installed, or you can see the workflow parameters to deploy to shinyapps.io (see usage documentation). ### Pipeline information @@ -87,7 +98,60 @@ The app must be run in an environment with [ShinyNGS](https://github.com/pinin4f - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Parameters used by the pipeline run: `params.json`.
[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. + +## Frequently asked questions + +### Why are no genes flagged as differentially expressed? + +#### 1. Low replication: + +**Problem:** The number of replicates in your RNA-seq experiment may be insufficient to detect statistically significant differential expression. + +**Suggested course of action:** Consider increasing the number of replicates to improve the statistical power of your analysis. Repeating the experiment with greater replication allows for better estimation of biological variation and increases the chances of observing significant differential expression. Consult with experimental design experts or statisticians to determine the appropriate sample size calculation based on your specific research question and resources. + +#### 2. Subtle effect: + +**Problem:** The experimental intervention may have a relatively subtle impact on gene expression, making it challenging to detect differential expression using default thresholds. + +**Suggested course of action:** Adjust the analysis parameters to improve sensitivity in capturing smaller changes in gene expression. Try reducing the `differential_min_fold_change` parameter to include genes with smaller fold changes. Additionally, consider increasing the `differential_max_qval` parameter to relax the significance threshold and capture a broader range of significant p-values or q-values. By fine-tuning these parameters, you increase the likelihood of identifying genes with subtle but biologically relevant changes in expression. + +#### 3. Genuinely no differential expression: + +**Problem:** It is possible that the experimental intervention has not significantly impacted gene expression, resulting in the absence of differentially expressed genes. + +**Suggested course of action:** Evaluate the experimental design and the perturbation itself. If the intervention is expected to induce changes in gene expression but no differential expression is observed, revisit the experimental design, biological perturbation, or underlying hypothesis. Consider reassessing the experimental conditions or exploring alternative approaches to investigate other aspects of the biological system. + +#### 4. Unaccounted sources of variance: + +**Problem:** Other factors outside the main treatment may introduce variance in gene expression, leading to a decrease in power to detect differential expression. + +**Suggested course of action:** Examine the PCA (Principal Component Analysis) and metadata association plots generated by the workflow. Identify variables associated with components that contribute significantly to the variance in your data. Include these variables as covariates in the contrasts table's blocking column to account for their effects on gene expression. By incorporating these unaccounted sources of variance into your analysis, you improve the accuracy and power to detect differential expression. + +#### 5. Biological complexity and pathway-level effects: + +**Problem:** The experimental intervention may not lead to observable differential expression at the individual gene level, but there may be coordinated changes at the pathway or functional level. + +**Suggested course of action:** Utilize pathway analysis tools such as Gene Set Enrichment Analysis (GSEA), available in this workflow. These tools evaluate the enrichment of gene sets or functional annotations to identify broader biological processes influenced by the experimental intervention. By focusing on pathway-level analysis, you can capture the overall impact of the intervention on biological processes, even if differential expression at the individual gene level is not apparent. + +#### 6. Limited options for normalization: + +**Problem:** The nf-core differential abundance workflow currently offers a limited set of normalization methods, which may not fully address the specific normalization requirements of your experiment. + +**Suggested course of action:** If the existing options do not adequately address your experiment's normalization challenges, consider developing custom normalization modules tailored to your needs. By contributing these modules to the nf-core community, you can expand the range of normalization options available to researchers. Your contributions will help researchers in similar situations and contribute to the continuous improvement and customization of the workflow. + +#### 7. Technical variability and batch effects: + +**Problem:** Technical variability and batch effects can introduce noise and confound the detection of differential expression. + +**Suggested course of action:** Address technical variability and batch effects in the experimental design and data analysis. Randomize sample collection, incorporate control samples, and balance samples across different experimental batches. These measures minimize technical variation, enhance the robustness of the analysis, and increase the chances of detecting true differential expression. + +#### 8. Workflow issues or bugs: + +**Problem:** Potential issues or bugs in the nf-core differential abundance workflow can affect the detection of differential expression or data analysis. + +**Suggested course of action:** Report any issues or suspected bugs by opening an issue on the [nf-core differential abundance workflow repository](https://github.com/nf-core/differentialabundance). Provide specific details, such as software versions, error messages, and relevant data or code snippets. Your feedback is valuable for improving the workflow's reliability. If you have the technical expertise, consider contributing to the workflow by submitting pull requests to address issues, fix bugs, or propose enhancements. diff --git a/docs/usage.md b/docs/usage.md index 960c4cb6..9f998cbc 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -11,10 +11,10 @@ Differential analysis is a common task in a variety of use cases. In essence, al With the above in mind, running this workflow requires: - a set of abundance values. This can be: - - (for RNA-seq): a matrix of quantifications with observations by column and features by row + - (for RNA-seq or MaxQuant proteomics measurements): a matrix of quantifications with observations by column and features by row - (for Affymetrix microarrays): a tar'd archive of CEL files - a description of the observations such as a sample sheet from RNA-seq analysis -- a description of the features, for our initial RNA-seq application this can be simply the GTF file from which gene annotations can be derived. For Affymetrix arrays this can be derived from the array platform annotation package automatically. You can also supply your own table. +- a description of the features, for our initial RNA-seq application this can be simply the GTF file from which gene annotations can be derived. For Affymetrix arrays this can be derived from the array platform annotation package automatically. Skip for MaxQuant. You can also supply your own table. - a specification of how the matrix should be split, and how the resulting groups should be compared ## Observations (samplesheet) input @@ -39,6 +39,24 @@ TREATED_REP3,AEG588A2_S1_L004_R1_001.fastq.gz,AEG588A2_S1_L004_R2_001.fastq.gz,t The file can be tab or comma separated. +### Affymetrix arrays + +Abundances for Affy arrays are provided in CEL files within an archive. When creating sample sheets for Affy arrays, it's crucial to include a column that specifies which file corresponds to each sample. This file column is essential for linking each sample to its corresponding data file, as shown in the example below: + +``` +"file","id","name","patient","phenotype" +"GSM1229341_Gudjohnsson_001_6690_PP.CEL.gz","GSM1229341","p6690_PP","6690","lesional" +"GSM1229342_Gudjohnsson_002_6690_PN.CEL.gz","GSM1229342","p6690_PN","6690","uninvolved" +"GSM1229343_Gudjohnsson_003_7450_PN.CEL.gz","GSM1229343","p7450_PN","7450","uninvolved" +"GSM1229344_Gudjohnsson_004_7450_PP.CEL.gz","GSM1229344","p7450_PP","7450","lesional" +"GSM1229345_Gudjohnsson_005_7912_PP.CEL.gz","GSM1229345","p7912_PP","7912","lesional" +"GSM1229346_Gudjohnsson_006_7912_PN.CEL.gz","GSM1229346","p7912_PN","7912","uninvolved" +"GSM1229347_Gudjohnsson_007_8470_PP.CEL.gz","GSM1229347","p8470_PP","6690","lesional" +"GSM1229348_Gudjohnsson_008_8470_PN.CEL.gz","GSM1229348","p8470_PN","6690","uninvolved" +``` + +The "file" column in this example is used to specify the data file associated with each sample, which is essential for data analysis and interpretation. + ## Abundance values ### RNA-seq and similar @@ -49,6 +67,14 @@ The file can be tab or comma separated. This is a numeric square matrix file, comma or tab-separated, with a column for every observation, and features corresponding to the supplied feature set. The parameters `--observations_id_col` and `--features_id_col` define which of the associated fields should be matched in those inputs. +### MaxQuant intensities + +```bash +--matrix '[path to matrix file]' +``` + +This is the proteinGroups.txt file produced by MaxQuant. It is a tab-separated matrix file with a column for every observation (plus additional columns for other types of measurements and information); each row contains these data for a set of proteins. The parameters `--observations_id_col` and `--features_id_col` define which of the associated fields should be matched in those inputs. The parameter `--proteus_measurecol_prefix` defines which prefix is used to extract those matrix columns which contain the measurements to be used. For example, the default `LFQ intensity ` will indicate that columns like LFQ intensity S1, LFQ intensity S2, LFQ intensity S3 etc. are used (do not forget trailing whitespace in this parameter, if required!). + ### Affymetrix microarrays ```bash @@ -57,6 +83,22 @@ This is a numeric square matrix file, comma or tab-separated, with a column for This is an archive of CEL files as frequently found in GEO. +### Use SOFT matrices + +Alternatively, the user may want to work with SOFT matrices. In this case, setting + +`--study_type geo_soft_file` and `--querygse [GSE study ID]` + +enables the pipeline to download normalised SOFT matrices automatically (note that even though Affymetrix arrays are also supported in the SOFT matrix track, it is recommended to work from CEL files in this case). + +As for other platforms You may subset the metadata features used in reporting etc. e.g. for GPL570 (Affymetrix Plus 2.0 arrays) this could be done with + +``` +--features_metadata_cols ID,Entrez_Gene_ID,Symbol,Definition +``` + +Full list of features metadata are available on GEO platform pages. + ## Contrasts file ```bash @@ -93,7 +135,7 @@ The file can be tab or comma separated. --gtf '[path to gtf file]' ``` -This is usually the easiest way to supply annotations for RNA-seq features. It should match the GTF used in nf-core/rnaseq if that workflow was used to produce the input expression matrix. +This is usually the easiest way to supply annotations for RNA-seq features. It should match the GTF used in nf-core/rnaseq if that workflow was used to produce the input expression matrix. Skip for MaxQuant. ### Annotation package identifiers for Affymetrix arrays @@ -107,11 +149,40 @@ To override the above options, you may also supply your own features table as a --features '[path to features TSV]' ``` -By default, if you don't provide features, for non-array data the workflow will fall back to attempting to use the matrix itself as a source of feature annotations. For this to work you must make sure to set the `features_id_col`, `features_name_col` and `features_metadata_cols` parameters to the appropriate values, for example by setting them to 'gene_id' if that is the identifier column on the matrix. This will cause the gene ID to be used everywhere rather than more accessible gene symbols (as can be derived from the GTF), but the workflow should run. +By default, if you don't provide features, for non-array data the workflow will fall back to attempting to use the matrix itself as a source of feature annotations. For this to work you must make sure to set the `features_id_col`, `features_name_col` and `features_metadata_cols` parameters to the appropriate values, for example by setting them to 'gene_id' if that is the identifier column on the matrix. This will cause the gene ID to be used everywhere rather than more accessible gene symbols (as can be derived from the GTF), but the workflow should run. Please use this option for MaxQuant analysis, i.e. do not provide features. + +## Working with the output R markdown file + +The pipeline produces an R markdown file which, if you're proficient in R, you can use to tweak the report after it's generated (**note**- if you need the same customisations repeatedly we would recommend you supply your own template using the `report_file` parameter). + +To work with R markdown files you will need Rstudio. You will also need to have the ShinyNGS R module [installed](https://github.com/pinin4fjords/shinyngs#installation), since it supplies a lot of the accessory plotting functions etc that you will need. The exact way you will do this may depend on your exact systems, but for example + +### 1. Create a conda environment with Shinyngs and activate it + +```bash +conda create -n shinyngs r-shinyngs +conda activate shinyngs +``` + +### 2. Open RStudio from this environment + +For example, on a Mac Terminal: + +```bash +open -na Rstudio +``` + +Now, unzip the report archive, and in RStudio change directory to that location: + +``` +setwd("/path/to/unzipped/directory") +``` + +Now open the R Markdown file from the RStudio UI, and you should have everything you need to run the various code segments and render the whole document to HTML again if you wish. ## Shiny app generation -The pipeline is capable of building, and even deploying (to [shinyapps.io](https://www.shinyapps.io/)) for you a Shiny app built with [ShinyNGS](https://github.com/pinin4fjords/shinyngs). +The pipeline is capable of building, and even deploying (to [shinyapps.io](https://www.shinyapps.io/)) for you a Shiny app built with [ShinyNGS](https://github.com/pinin4fjords/shinyngs). There is a basic example running [here](https://pinin4fjords.shinyapps.io/tester/) which shows what this might look like. This is enabled with: @@ -181,7 +252,7 @@ The typical command for running the pipeline is as follows: ```bash nextflow run nf-core/differentialabundance \ - [--profile rnaseq OR -profile affy] \ + [-profile rnaseq OR -profile affy] \ --input samplesheet.csv \ --contrasts contrasts.csv \ [--matrix assay_matrix.tsv OR --affy_cel_files_archive cel_files.tar] \ @@ -201,9 +272,37 @@ work # Directory containing the nextflow working files # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` -## Hints and tips +### Hints and tips - If you don't like the colors used in the report, try a different `RColorBrewer` palette by changing the `exploratory_palette_name` and/or `differential_palette_name` parameters. +- In rare cases, some users have reported issues with DESeq2 using all available cores on a machine, rather than those specified in the process configuration. This can be prevented by setting the `OPENBLAS_NUM_THREADS` environment variable. + +### Params files + +If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. + +Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. + +:::warning +Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +::: + +The above pipeline run specified with a params file in yaml format: + +```bash +nextflow run nf-core/differentialabundance -profile docker -params-file params.yaml +``` + +with `params.yaml` containing: + +```yaml +input: './samplesheet.csv' +outdir: './results/' +genome: 'GRCh37' +<...> +``` + +You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). ### Updating the pipeline @@ -221,17 +320,27 @@ First, go to the [nf-core/differentialabundance releases page](https://github.co This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. +To further assist in reproducibility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. + +:::tip +If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +::: + ## Core Nextflow arguments -> **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +:::note +These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +::: ### `-profile` Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. -> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +:::info +We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +::: The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). @@ -253,8 +362,10 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) - `charliecloud` - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) - `conda` - - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. ### `-resume` @@ -298,71 +409,19 @@ Work dir: Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` ``` -#### For beginners - -A first step to bypass this error, you could try to increase the amount of CPUs, memory, and time for the whole pipeline. Therefor you can try to increase the resource for the parameters `--max_cpus`, `--max_memory`, and `--max_time`. Based on the error above, you have to increase the amount of memory. Therefore you can go to the [parameter documentation of differentialabundance](https://nf-co.re/differentialabundance/1.0.0/parameters) and scroll down to the `show hidden parameter` button to get the default value for `--max_memory`. In this case 128GB, you than can try to run your pipeline again with `--max_memory 200GB -resume` to skip all process, that were already calculated. If you can not increase the resource of the complete pipeline, you can try to adapt the resource for a single process as mentioned below. - -#### Advanced option on process level - -To bypass this error you would need to find exactly which resources are set by the `DESEQ2_DIFFERENTIAL` process. The quickest way is to search for `process DESEQ2_DIFFERENTIAL` in the [nf-core/differentialabundance Github repo](https://github.com/nf-core/differentialabundance/search?q=process+DESEQ2_DIFFERENTIAL). -We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/deseq2/differential/main.nf`. -If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_medium`](https://github.com/nf-core/differentialabundance/blob/0c0e457d88a33baeb3061114978eebee6cceb46c/modules/nf-core/deseq2/differential/main.nf#L3). -The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements. -The default values for the `process_medium` label are set in the pipeline's [`base.config`](https://github.com/nf-core/differentialabundance/blob/0c0e457d88a33baeb3061114978eebee6cceb46c/conf/base.config#L37) which in this case is defined as 2GB. -Providing you haven't set any other standard nf-core parameters to **cap** the [maximum resources](https://nf-co.re/usage/configuration#max-resources) used by the pipeline then we can try and bypass the `DESEQ2_DIFFERENTIAL` process failure by creating a custom config file that sets a higher memory. -The custom config below can then be provided to the pipeline via the [`-c`](#-c) parameter as highlighted in previous sections. - -```nextflow -process { - withName: 'NFCORE_DIFFERENTIALABUNDANCE:DIFFERENTIALABUNDANCE::DESEQ2_DIFFERENTIAL' { - memory = 20.GB - } -} -``` - -> **NB:** We specify the full process name i.e. `NFCORE_DIFFERENTIALABUNDANCE:DIFFERENTIALABUNDANCE::DESEQ2_DIFFERENTIAL` in the config file because this takes priority over the short name (`DESEQ2_DIFFERENTIAL`) and allows existing configuration using the full process name to be correctly overridden. -> -> If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. - -### Updating containers (advanced users) - -The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`. - -1. Check the default version used by the pipeline in the module file for [Pangolin](https://github.com/nf-core/viralrecon/blob/a85d5969f9025409e3618d6c280ef15ce417df65/modules/nf-core/software/pangolin/main.nf#L14-L19) -2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags) -3. Create the custom config accordingly: - - - For Docker: +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. - ```nextflow - process { - withName: PANGOLIN { - container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +### Custom Containers - - For Singularity: +In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. - ```nextflow - process { - withName: PANGOLIN { - container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. - - For Conda: +### Custom Tool Arguments - ```nextflow - process { - withName: PANGOLIN { - conda = 'bioconda::pangolin=3.0.5' - } - } - ``` +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. -> **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. ### nf-core/configs diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy deleted file mode 100755 index 33cd4f6e..00000000 --- a/lib/NfcoreSchema.groovy +++ /dev/null @@ -1,528 +0,0 @@ -// -// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. -// - -import org.everit.json.schema.Schema -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.ValidationException -import org.json.JSONObject -import org.json.JSONTokener -import org.json.JSONArray -import groovy.json.JsonSlurper -import groovy.json.JsonBuilder - -class NfcoreSchema { - - // - // Resolve Schema path relative to main workflow directory - // - public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { - return "${workflow.projectDir}/${schema_filename}" - } - - // - // Function to loop over all parameters defined in schema and check - // whether the given parameters adhere to the specifications - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { - def has_error = false - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Check for nextflow core params and unexpected params - def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text - def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') - def nf_params = [ - // Options for base `nextflow` command - 'bg', - 'c', - 'C', - 'config', - 'd', - 'D', - 'dockerize', - 'h', - 'log', - 'q', - 'quiet', - 'syslog', - 'v', - - // Options for `nextflow run` command - 'ansi', - 'ansi-log', - 'bg', - 'bucket-dir', - 'c', - 'cache', - 'config', - 'dsl2', - 'dump-channels', - 'dump-hashes', - 'E', - 'entry', - 'latest', - 'lib', - 'main-script', - 'N', - 'name', - 'offline', - 'params-file', - 'pi', - 'plugins', - 'poll-interval', - 'pool-size', - 'profile', - 'ps', - 'qs', - 'queue-size', - 'r', - 'resume', - 'revision', - 'stdin', - 'stub', - 'stub-run', - 'test', - 'w', - 'with-charliecloud', - 'with-conda', - 'with-dag', - 'with-docker', - 'with-mpi', - 'with-notification', - 'with-podman', - 'with-report', - 'with-singularity', - 'with-timeline', - 'with-tower', - 'with-trace', - 'with-weblog', - 'without-docker', - 'without-podman', - 'work-dir' - ] - def unexpectedParams = [] - - // Collect expected parameters from the schema - def expectedParams = [] - def enums = [:] - for (group in schemaParams) { - for (p in group.value['properties']) { - expectedParams.push(p.key) - if (group.value['properties'][p.key].containsKey('enum')) { - enums[p.key] = group.value['properties'][p.key]['enum'] - } - } - } - - for (specifiedParam in params.keySet()) { - // nextflow params - if (nf_params.contains(specifiedParam)) { - log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" - has_error = true - } - // unexpected params - def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } - def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { - // Temporarily remove camelCase/camel-case params #1035 - def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} - if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ - unexpectedParams.push(specifiedParam) - } - } - } - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Validate parameters against the schema - InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() - JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) - - // Remove anything that's in params.schema_ignore_params - raw_schema = removeIgnoredParams(raw_schema, params) - - Schema schema = SchemaLoader.load(raw_schema) - - // Clean the parameters - def cleanedParams = cleanParameters(params) - - // Convert to JSONObject - def jsonParams = new JsonBuilder(cleanedParams) - JSONObject params_json = new JSONObject(jsonParams.toString()) - - // Validate - try { - schema.validate(params_json) - } catch (ValidationException e) { - println '' - log.error 'ERROR: Validation of pipeline parameters failed!' - JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, params_json, log, enums) - println '' - has_error = true - } - - // Check for unexpected parameters - if (unexpectedParams.size() > 0) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println '' - def warn_msg = 'Found unexpected parameters:' - for (unexpectedParam in unexpectedParams) { - warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" - } - log.warn warn_msg - log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" - println '' - } - - if (has_error) { - System.exit(1) - } - } - - // - // Beautify parameters for --help - // - public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - Integer num_hidden = 0 - String output = '' - output += 'Typical pipeline command:\n\n' - output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - Integer max_chars = paramsMaxChars(params_map) + 1 - Integer desc_indent = max_chars + 14 - Integer dec_linewidth = 160 - desc_indent - for (group in params_map.keySet()) { - Integer num_params = 0 - String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (group_params.get(param).hidden && !params.show_hidden_params) { - num_hidden += 1 - continue; - } - def type = '[' + group_params.get(param).type + ']' - def description = group_params.get(param).description - def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' - def description_default = description + colors.dim + defaultValue + colors.reset - // Wrap long description texts - // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap - if (description_default.length() > dec_linewidth){ - List olines = [] - String oline = "" // " " * indent - description_default.split(" ").each() { wrd -> - if ((oline.size() + wrd.size()) <= dec_linewidth) { - oline += wrd + " " - } else { - olines += oline - oline = wrd + " " - } - } - olines += oline - description_default = olines.join("\n" + " " * desc_indent) - } - group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' - num_params += 1 - } - group_output += '\n' - if (num_params > 0){ - output += group_output - } - } - if (num_hidden > 0){ - output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset - } - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Groovy Map summarising parameters/workflow options used by the pipeline - // - public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = workflow.containerEngine - } - if (workflow.container) { - workflow_summary['container'] = workflow.container - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value != null) { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir', '') - sub_string = sub_string.replace('\${projectDir}', '') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir', '') - sub_string = sub_string.replace('\${params.outdir}', '') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - // We have a default in the schema, and this isn't it - if (schema_value != null && params_value != schema_value) { - sub_params.put(param, params_value) - } - // No default in the schema, and this isn't empty - else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { - sub_params.put(param, params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - // - // Beautify parameters for summary and return as string - // - public static String paramsSummaryLog(workflow, params) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - String output = '' - def params_map = paramsSummaryMap(workflow, params) - def max_chars = paramsMaxChars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += colors.bold + group + colors.reset + '\n' - for (param in group_params.keySet()) { - output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' - } - output += '\n' - } - } - output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Loop over nested exceptions and print the causingException - // - private static void printExceptions(ex_json, params_json, log, enums, limit=5) { - def causingExceptions = ex_json['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (ex_json['pointerToViolation'] == '#') { - log.error "* ${ex_json['message']}" - } - // Error with specific param - else { - def param = ex_json['pointerToViolation'] - ~/^#\// - def param_val = params_json[param].toString() - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - if (enums[param].size() > limit) { - log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" - } else { - log.error "${error_msg}: ${enums[param].join(', ')})" - } - } else { - log.error "* --${param}: ${ex_json['message']} (${param_val})" - } - } - } - for (ex in causingExceptions) { - printExceptions(ex, params_json, log, enums) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(json_array, element) { - def list = [] - int len = json_array.length() - for (int i=0;i - if(raw_schema.keySet().contains('definitions')){ - raw_schema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { - raw_schema.get("properties").remove(ignore_param) - } - if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { - def cleaned_required = removeElement(raw_schema.required, ignore_param) - raw_schema.put("required", cleaned_required) - } - } - return raw_schema - } - - // - // Clean and check parameters relative to Nextflow native classes - // - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - // - // This function tries to read a JSON params file - // - private static LinkedHashMap paramsLoad(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = paramsRead(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - // - // Method to actually read in JSON file using Groovy. - // Group (as Key), values are all parameters - // - Parameter1 as Key, Description as Value - // - Parameter2 as Key, Description as Value - // .... - // Group - // - - private static LinkedHashMap paramsRead(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - // - // Get maximum number of characters across all parameter names - // - private static Integer paramsMaxChars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } -} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 25a0a74a..01b8653d 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -3,6 +3,7 @@ // import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput class NfcoreTemplate { @@ -128,7 +129,7 @@ class NfcoreTemplate { def email_html = html_template.toString() // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] def sf = new File("$projectDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) @@ -222,6 +223,21 @@ class NfcoreTemplate { } } + // + // Dump pipeline parameters in a json file + // + public static void dump_parameters(workflow, params) { + def output_d = new File("${params.outdir}/pipeline_info/") + if (!output_d.exists()) { + output_d.mkdirs() + } + + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def output_pf = new File(output_d, "params_${timestamp}.json") + def jsonStr = JsonOutput.toJson(params) + output_pf.text = JsonOutput.prettyPrint(jsonStr) + } + // // Print pipeline summary on completion // diff --git a/lib/WorkflowDifferentialabundance.groovy b/lib/WorkflowDifferentialabundance.groovy index 435253f3..4acad311 100755 --- a/lib/WorkflowDifferentialabundance.groovy +++ b/lib/WorkflowDifferentialabundance.groovy @@ -2,6 +2,7 @@ // This file holds several functions specific to the workflow/differentialabundance.nf in the nf-core/differentialabundance pipeline // +import nextflow.Nextflow import groovy.text.SimpleTemplateEngine class WorkflowDifferentialabundance { @@ -10,6 +11,7 @@ class WorkflowDifferentialabundance { // Check and validate parameters // public static void initialise(params, log) { + genomeExistsError(params, log) } @@ -40,32 +42,82 @@ class WorkflowDifferentialabundance { return yaml_file_text } - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { + // + // Generate methods description for MultiQC + // + + public static String toolCitationText(params) { + def citation_text = [ + "Tools used in the workflow included:", + params["study_type"] == 'affy_array' ? "affy (Gautier et al. 2004": "", + params["study_type"] == 'rnaseq' ? "DESeq2 (Love et al 2014)," : "", + "ggplot2 (Wickham 2016)", + "GEOQuery (Davis et al. 2007", + params["study_type"] != 'rnaseq' ? "Limma (Ritchie eta al 2015" : "", + "optparse (Davis 2018)", + "plotly (Sievert 2020)", + params["study_type"] != 'maxquant' ? "Proteus (Gierlinski 2018)" : "", + "RColorBrewer (Neuwirth 2014)", + "RMarkdown (Allaire et al. 2022)", + "shinyngs (Manning 2022)", + "SummarizedExperiment (Morgan et al. 2020)", + "." + ].join(' ').trim() + + return citation_text + } + + public static String toolBibliographyText(params) { + + // TODO Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text + } + + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file def meta = [:] meta.workflow = run_workflow.toMap() meta["manifest_map"] = run_workflow.manifest.toMap() + // Pipeline DOI meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + //meta["tool_bibliography"] = toolBibliographyText(params) + + def methods_text = mqc_methods_yaml.text def engine = new SimpleTemplateEngine() def description_html = engine.createTemplate(methods_text).make(meta) return description_html - }// + } + + // // Exit pipeline if incorrect --genome key provided // private static void genomeExistsError(params, log) { if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + " Currently, the available genome keys are:\n" + " ${params.genomes.keySet().join(", ")}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - System.exit(1) + Nextflow.error(error_string) } } } diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 9b44627f..48ae8ce2 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -2,6 +2,8 @@ // This file holds several functions specific to the main.nf workflow in the nf-core/differentialabundance pipeline // +import nextflow.Nextflow + class WorkflowMain { // @@ -17,40 +19,10 @@ class WorkflowMain { " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" } - // - // Generate help string - // - public static String help(workflow, params, log) { - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker --contrasts contrasts.csv --matrix matrix.tsv" - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } - - // - // Generate parameter summary log string - // - public static String paramsSummaryLog(workflow, params, log) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } - // // Validate parameters and print summary to screen // public static void initialise(workflow, params, log) { - // Print help to screen if required - if (params.help) { - log.info help(workflow, params, log) - System.exit(0) - } // Print workflow version and exit on --version if (params.version) { @@ -59,14 +31,6 @@ class WorkflowMain { System.exit(0) } - // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params, log) - - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) - } - // Check that a -profile or Nextflow config has been provided to run the pipeline NfcoreTemplate.checkConfigProvided(workflow, log) @@ -80,8 +44,7 @@ class WorkflowMain { // Check input has been provided if (!params.input) { - log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'" - System.exit(1) + Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") } } // diff --git a/main.nf b/main.nf index c5f11551..28432388 100644 --- a/main.nf +++ b/main.nf @@ -4,7 +4,6 @@ nf-core/differentialabundance ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Github : https://github.com/nf-core/differentialabundance - Website: https://nf-co.re/differentialabundance Slack : https://nfcore.slack.com/channels/differentialabundance ---------------------------------------------------------------------------------------- @@ -26,6 +25,22 @@ params.gtf = WorkflowMain.getGenomeAttribute(params, 'gtf') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +include { validateParameters; paramsHelp } from 'plugin/nf-validation' + +// Print help message if needed +if (params.help) { + def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) + def citation = '\n' + WorkflowMain.citation(workflow) + '\n' + def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" + log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) + System.exit(0) +} + +// Validate input parameters +if (params.validate_params) { + validateParameters() +} + WorkflowMain.initialise(workflow, params, log) /* diff --git a/modules.json b/modules.json index 8079b287..ff601367 100644 --- a/modules.json +++ b/modules.json @@ -7,83 +7,97 @@ "nf-core": { "affy/justrma": { "branch": "master", - "git_sha": "960cc23c7bb97ff718e162efeefdd842fa8f5c70", + "git_sha": "d0b4fc03af52a1cc8c6fb4493b921b57352b1dd8", "installed_by": ["modules"] }, "atlasgeneannotationmanipulation/gtf2featureannotation": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "7101db4432d3268b7fcb5b8f75fa0a022dc5561b", + "git_sha": "4125fb0c2152efbcec8d9ed71a756c9274b2f7f5", "installed_by": ["modules"] }, "custom/matrixfilter": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "e432a05dc59832d55307442264ad2b70039f5e3a", "installed_by": ["modules"] }, "custom/tabulartogseacls": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", "installed_by": ["modules"] }, "custom/tabulartogseagct": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", "installed_by": ["modules"] }, "deseq2/differential": { "branch": "master", - "git_sha": "e05db1b791ba8202853f275438fcc67c563ca479", + "git_sha": "d0b4fc03af52a1cc8c6fb4493b921b57352b1dd8", + "installed_by": ["modules"] + }, + "geoquery/getgeo": { + "branch": "master", + "git_sha": "6814b0659c51e447684a58c2b834a9f3b530540d", "installed_by": ["modules"] }, "gsea/gsea": { "branch": "master", - "git_sha": "595f49842df5f8791b1dac9f5feabb56813facc5", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "gunzip": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "8663c9d215573d4bd0417c93dbc310aa9e6720a4", "installed_by": ["modules"] }, "limma/differential": { "branch": "master", - "git_sha": "4805d97c29f1a3cdfc26a828796296e2d58076e1", + "git_sha": "d0b4fc03af52a1cc8c6fb4493b921b57352b1dd8", + "installed_by": ["modules"] + }, + "proteus/readproteingroups": { + "branch": "master", + "git_sha": "685765c4a5e3423d20f74aa9c4405ef0b8c4748d", "installed_by": ["modules"] }, "rmarkdownnotebook": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "4e640839b1076da7c2a2a4a8f160815e00eedfba", "installed_by": ["modules"] }, "shinyngs/app": { "branch": "master", - "git_sha": "880d634b0d5aead9447ae29e3e02e0e31ca7ae7f", - "installed_by": ["modules"], - "patch": "modules/nf-core/shinyngs/app/shinyngs-app.diff" + "git_sha": "1b2fa2138849ac09b3d5c1e25ed70650ec1ebd7b", + "installed_by": ["modules"] }, "shinyngs/staticdifferential": { "branch": "master", - "git_sha": "880d634b0d5aead9447ae29e3e02e0e31ca7ae7f", + "git_sha": "1b2fa2138849ac09b3d5c1e25ed70650ec1ebd7b", "installed_by": ["modules"] }, "shinyngs/staticexploratory": { "branch": "master", - "git_sha": "880d634b0d5aead9447ae29e3e02e0e31ca7ae7f", + "git_sha": "1b2fa2138849ac09b3d5c1e25ed70650ec1ebd7b", "installed_by": ["modules"] }, "shinyngs/validatefomcomponents": { "branch": "master", - "git_sha": "880d634b0d5aead9447ae29e3e02e0e31ca7ae7f", + "git_sha": "1b2fa2138849ac09b3d5c1e25ed70650ec1ebd7b", "installed_by": ["modules"] }, "untar": { "branch": "master", - "git_sha": "cc1f997fab6d8fde5dc0e6e2a310814df5b53ce7", + "git_sha": "d0b4fc03af52a1cc8c6fb4493b921b57352b1dd8", + "installed_by": ["modules"] + }, + "zip": { + "branch": "master", + "git_sha": "9ba4f5972ae27474f752ddb20e855f48047aa6c8", "installed_by": ["modules"] } } diff --git a/modules/local/tabular_to_gsea_chip.nf b/modules/local/tabular_to_gsea_chip.nf index a0decc44..c40d59a4 100644 --- a/modules/local/tabular_to_gsea_chip.nf +++ b/modules/local/tabular_to_gsea_chip.nf @@ -5,7 +5,7 @@ process TABULAR_TO_GSEA_CHIP { conda "conda-forge::gawk=5.1.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : - 'quay.io/biocontainers/gawk:5.1.0' }" + 'biocontainers/gawk:5.1.0' }" input: path tsv diff --git a/modules/nf-core/affy/justrma/main.nf b/modules/nf-core/affy/justrma/main.nf index 781e98ac..d0da7129 100644 --- a/modules/nf-core/affy/justrma/main.nf +++ b/modules/nf-core/affy/justrma/main.nf @@ -2,10 +2,10 @@ process AFFY_JUSTRMA { tag "$meta.id" label 'process_single' - conda "bioconda::bioconductor-affy=1.76.0" + conda "bioconda::bioconductor-affy=1.78.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bioconductor-affy:1.76.0--r42hc0cfd56_2': - 'quay.io/biocontainers/bioconductor-affy:1.76.0--r42hc0cfd56_2' }" + 'https://depot.galaxyproject.org/singularity/bioconductor-affy:1.78.0--r43ha9d7317_1': + 'biocontainers/bioconductor-affy:1.78.0--r43ha9d7317_1' }" input: tuple val(meta), path(samplesheet), path(celfiles_dir) diff --git a/modules/nf-core/atlasgeneannotationmanipulation/gtf2featureannotation/main.nf b/modules/nf-core/atlasgeneannotationmanipulation/gtf2featureannotation/main.nf index 9bfd5fc2..42c6755d 100644 --- a/modules/nf-core/atlasgeneannotationmanipulation/gtf2featureannotation/main.nf +++ b/modules/nf-core/atlasgeneannotationmanipulation/gtf2featureannotation/main.nf @@ -5,7 +5,7 @@ process ATLASGENEANNOTATIONMANIPULATION_GTF2FEATUREANNOTATION { conda "bioconda::atlas-gene-annotation-manipulation=1.1.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/atlas-gene-annotation-manipulation%3A1.1.0--hdfd78af_0': - 'quay.io/biocontainers/atlas-gene-annotation-manipulation:1.1.0--hdfd78af_0' }" + 'biocontainers/atlas-gene-annotation-manipulation:1.1.0--hdfd78af_0' }" input: tuple val(meta), path(gtf) diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index 800a6099..c9d014b1 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.14" + conda "bioconda::multiqc=1.15" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : + 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml index 60b546a0..c32657de 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -1,7 +1,9 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json name: custom_dumpsoftwareversions description: Custom module used to dump software versions within the nf-core pipeline template keywords: - custom + - dump - version tools: - custom: diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py old mode 100644 new mode 100755 diff --git a/modules/nf-core/custom/matrixfilter/main.nf b/modules/nf-core/custom/matrixfilter/main.nf index 38862429..137918d7 100644 --- a/modules/nf-core/custom/matrixfilter/main.nf +++ b/modules/nf-core/custom/matrixfilter/main.nf @@ -4,7 +4,7 @@ process CUSTOM_MATRIXFILTER { conda "conda-forge::r-base=4.2.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/r-base:4.2.1' : - 'quay.io/biocontainers/r-base:4.2.1' }" + 'biocontainers/r-base:4.2.1' }" input: tuple val(meta), path(abundance) @@ -12,6 +12,7 @@ process CUSTOM_MATRIXFILTER { output: tuple val(meta), path("*.filtered.tsv") , emit: filtered + tuple val(meta), path("*.tests.tsv") , emit: tests tuple val(meta), path("R_sessionInfo.log") , emit: session_info path "versions.yml" , emit: versions diff --git a/modules/nf-core/custom/matrixfilter/meta.yml b/modules/nf-core/custom/matrixfilter/meta.yml index 337af6d6..9911efce 100644 --- a/modules/nf-core/custom/matrixfilter/meta.yml +++ b/modules/nf-core/custom/matrixfilter/meta.yml @@ -4,6 +4,8 @@ description: filter a matrix based on a minimum value and numbers of samples keywords: - matrix - filter + - abundance + - na tools: - "matrixfilter": description: "filter a matrix based on a minimum value and numbers of samples" @@ -34,32 +36,44 @@ input: present (see grouping_variable), but also to validate matrix columns. If not provided, all numeric columns are selected. - minimum_abundance: - type: numeric + type: float description: | Minimum abundance value, supplied via task.ext.args as --minimum_abundance default: 1 - minimum_samples: - type: numeric + type: integer description: | Minimum observations that must pass the threshold to retain the row/ feature (e.g. gene). Supplied via task.ext.args as --minimum_samples default: 1 - minimum_proportion: - type: numeric + type: float description: | A minimum proportion of observations that must pass the threshold. Supplied via task.ext.args as --minimum_proportion. Overrides minimum_samples default: 0 - grouping_variable: - type: optional string + type: string description: | Optionally supply a variable from the sample sheet that can be used to define groups and derive a minimum group size upon which to base minimum observation numbers. The rationale being to allow retention of features that might be present in only one group. Supplied via task.ext.args as --grouping_variable + - minimum_proportion_not_na: + type: float + description: | + A minimum proportion of observations that must have a numeric value (not be NA). + Supplied via task.ext.args as --minimum_proportion_not_na + default: 0.5 + - minimum_samples_not_na: + type: integer + description: | + Minimum observations that must have a numeric value (not be NA) to retain + the row/ feature (e.g. gene). Supplied via task.ext.args as + --minimum_samples_not_na. Overrides minimum_proportion_not_na output: - versions: @@ -75,6 +89,10 @@ output: type: file description: Filtered version of input matrix pattern: "*.filtered.tsv" + - tests: + type: file + description: Boolean matrix with pass/ fail status for each test on each feature + pattern: "*.tests.tsv" authors: - "@pinin4fjords" diff --git a/modules/nf-core/custom/matrixfilter/templates/matrixfilter.R b/modules/nf-core/custom/matrixfilter/templates/matrixfilter.R index 983e97cc..0bfff8cc 100644 --- a/modules/nf-core/custom/matrixfilter/templates/matrixfilter.R +++ b/modules/nf-core/custom/matrixfilter/templates/matrixfilter.R @@ -78,7 +78,9 @@ opt <- list( minimum_abundance = 1, minimum_samples = 1, minimum_proportion = 0, - grouping_variable = NULL + grouping_variable = NULL, + minimum_proportion_not_na = 0.5, + minimum_samples_not_na = NULL ) opt_types <- lapply(opt, class) @@ -152,11 +154,31 @@ if ((opt\$sample_file != '') && ( ! is.null(opt\$grouping_variable))){ opt\$minimum_samples <- ncol(abundance_matrix) * opt\$minimum_proportion } -# Generate a boolean vector specifying the features to retain +# Also set up filtering for NAs; use by default minimum_proportion_not_na; only +# use minimum_samples_not_na if it is provided (default NULL) +# -->NA test can always use minimum_samples_not_na as this will contain the correct +# value even if the proportion is to be used -keep <- apply(abundance_matrix, 1, function(x){ - sum(x > opt\$minimum_abundance) >= opt\$minimum_samples -}) +if (is.null(opt\$minimum_samples_not_na)) { + opt\$minimum_samples_not_na <- ncol(abundance_matrix) * opt\$minimum_proportion_not_na +} + +# Define the tests + +tests <- list( + 'abundance' = function(x) sum(x > opt\$minimum_abundance, na.rm = T) >= opt\$minimum_samples, # check if rows have sufficiently high abundance + 'na' = function(x) !any(is.na(x)) || sum(!is.na(x)) >= opt\$minimum_samples_not_na # check if enough values in row are not NA +) + +# Apply the functions row-wise on the abundance_matrix and store the result in a boolean matrix + +boolean_matrix <- t(apply(abundance_matrix, 1, function(row) { + sapply(tests, function(f) f(row)) +})) + +# We will retain features passing all tests + +keep <- apply(boolean_matrix, 1, all) # Write out the matrix retaining the specified rows and re-prepending the # column with the feature identifiers @@ -175,6 +197,20 @@ write.table( quote = FALSE ) +# Write a boolean matrix returning specifying the status of each test + +write.table( + data.frame(rownames(abundance_matrix), boolean_matrix), + file = paste0( + prefix, + '.tests.tsv' + ), + col.names = c(feature_id_name, names(tests)), + row.names = FALSE, + sep = '\t', + quote = FALSE +) + ################################################ ################################################ ## R SESSION INFO ## diff --git a/modules/nf-core/custom/tabulartogseacls/main.nf b/modules/nf-core/custom/tabulartogseacls/main.nf index f5f9dbcf..0df0eb1d 100644 --- a/modules/nf-core/custom/tabulartogseacls/main.nf +++ b/modules/nf-core/custom/tabulartogseacls/main.nf @@ -5,7 +5,7 @@ process CUSTOM_TABULARTOGSEACLS { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: tuple val(meta), path(samples) diff --git a/modules/nf-core/custom/tabulartogseagct/main.nf b/modules/nf-core/custom/tabulartogseagct/main.nf index 2aa72ef0..c128fe36 100644 --- a/modules/nf-core/custom/tabulartogseagct/main.nf +++ b/modules/nf-core/custom/tabulartogseagct/main.nf @@ -5,7 +5,7 @@ process CUSTOM_TABULARTOGSEAGCT { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: tuple val(meta), path(tabular) diff --git a/modules/nf-core/custom/tabulartogseagct/meta.yml b/modules/nf-core/custom/tabulartogseagct/meta.yml index 5de072e0..14274b60 100644 --- a/modules/nf-core/custom/tabulartogseagct/meta.yml +++ b/modules/nf-core/custom/tabulartogseagct/meta.yml @@ -3,6 +3,7 @@ description: Convert a TSV or CSV with features by row and observations by colum keywords: - gsea - gct + - tabular tools: - tabulartogseagct: description: "Convert a TSV or CSV with features by row and observations by column to a GCT format file as consumed by GSEA" diff --git a/modules/nf-core/deseq2/differential/main.nf b/modules/nf-core/deseq2/differential/main.nf index fe6d53d1..bad536b2 100644 --- a/modules/nf-core/deseq2/differential/main.nf +++ b/modules/nf-core/deseq2/differential/main.nf @@ -5,7 +5,7 @@ process DESEQ2_DIFFERENTIAL { conda "bioconda::bioconductor-deseq2=1.34.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bioconductor-deseq2:1.34.0--r41hc247a5b_3' : - 'quay.io/biocontainers/bioconductor-deseq2:1.34.0--r41hc247a5b_3' }" + 'biocontainers/bioconductor-deseq2:1.34.0--r41hc247a5b_3' }" input: tuple val(meta), val(contrast_variable), val(reference), val(target) diff --git a/modules/nf-core/deseq2/differential/templates/deseq_de.R b/modules/nf-core/deseq2/differential/templates/deseq_de.R index f1fcd847..0c7ea82d 100755 --- a/modules/nf-core/deseq2/differential/templates/deseq_de.R +++ b/modules/nf-core/deseq2/differential/templates/deseq_de.R @@ -93,6 +93,7 @@ round_dataframe_columns <- function(df, columns = NULL, digits = 8){ # Set defaults and classes opt <- list( + output_prefix = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix'), count_file = '$counts', sample_file = '$samplesheet', contrast_variable = '$contrast_variable', @@ -143,7 +144,7 @@ for ( ao in names(args_opt)){ # Check if required parameters have been provided -required_opts <- c('contrast_variable', 'reference_level', 'target_level') +required_opts <- c('contrast_variable', 'reference_level', 'target_level', 'output_prefix') missing <- required_opts[unlist(lapply(opt[required_opts], is.null)) | ! required_opts %in% names(opt)] if (length(missing) > 0){ @@ -369,10 +370,6 @@ if (opt\$shrink_lfc){ ################################################ ################################################ -prefix_part_names <- c('contrast_variable', 'reference_level', 'target_level', 'blocking_variables') -prefix_parts <- unlist(lapply(prefix_part_names, function(x) gsub("[^[:alnum:]]", "_", opt[[x]]))) -output_prefix <- paste(prefix_parts[prefix_parts != ''], collapse = '-') - contrast.name <- paste(opt\$target_level, opt\$reference_level, sep = "_vs_") cat("Saving results for ", contrast.name, " ...\n", sep = "") @@ -386,7 +383,7 @@ write.table( round_dataframe_columns(data.frame(comp.results, check.names = FALSE)), check.names = FALSE ), - file = paste(output_prefix, 'deseq2.results.tsv', sep = '.'), + file = paste(opt\$output_prefix, 'deseq2.results.tsv', sep = '.'), col.names = TRUE, row.names = FALSE, sep = '\t', @@ -396,7 +393,7 @@ write.table( # Dispersion plot png( - file = paste(output_prefix, 'deseq2.dispersion.png', sep = '.'), + file = paste(opt\$output_prefix, 'deseq2.dispersion.png', sep = '.'), width = 600, height = 600 ) @@ -405,7 +402,7 @@ dev.off() # R object for other processes to use -saveRDS(dds, file = paste(output_prefix, 'dds.rld.rds', sep = '.')) +saveRDS(dds, file = paste(opt\$output_prefix, 'dds.rld.rds', sep = '.')) # Size factors @@ -417,7 +414,7 @@ sf_df = data.frame( colnames(sf_df) <- c('sample', 'sizeFactor') write.table( sf_df, - file = paste(output_prefix, 'deseq2.sizefactors.tsv', sep = '.'), + file = paste(opt\$output_prefix, 'deseq2.sizefactors.tsv', sep = '.'), col.names = TRUE, row.names = FALSE, sep = '\t', @@ -432,7 +429,7 @@ write.table( counts(dds, normalized = TRUE), check.names = FALSE ), - file = paste(output_prefix, 'normalised_counts.tsv', sep = '.'), + file = paste(opt\$output_prefix, 'normalised_counts.tsv', sep = '.'), col.names = TRUE, row.names = FALSE, sep = '\t', @@ -458,7 +455,7 @@ for (vs_method_name in strsplit(opt\$vs_method, ',')){ ), check.names = FALSE ), - file = paste(output_prefix, vs_method_name,'tsv', sep = '.'), + file = paste(opt\$output_prefix, vs_method_name,'tsv', sep = '.'), col.names = TRUE, row.names = FALSE, sep = '\t', @@ -472,7 +469,7 @@ for (vs_method_name in strsplit(opt\$vs_method, ',')){ ################################################ ################################################ -sink(paste(output_prefix, "R_sessionInfo.log", sep = '.')) +sink(paste(opt\$output_prefix, "R_sessionInfo.log", sep = '.')) print(sessionInfo()) sink() diff --git a/modules/nf-core/geoquery/getgeo/main.nf b/modules/nf-core/geoquery/getgeo/main.nf new file mode 100644 index 00000000..39d12c26 --- /dev/null +++ b/modules/nf-core/geoquery/getgeo/main.nf @@ -0,0 +1,24 @@ +process GEOQUERY_GETGEO { + tag "$meta.id" + label 'process_single' + + conda "bioconda::bioconductor-geoquery=2.66.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bioconductor-geoquery:2.66.0--r42hdfd78af_0' : + 'biocontainers/bioconductor-geoquery:2.66.0--r42hdfd78af_0' }" + + input: + tuple val(meta), val(querygse) + + output: + tuple val(meta), path("*.rds") , emit: rds + tuple val(meta), path("*matrix.tsv") , emit: expression + tuple val(meta), path("*annotation.tsv") , emit: annotation + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + template 'getgeo.R' +} diff --git a/modules/nf-core/geoquery/getgeo/meta.yml b/modules/nf-core/geoquery/getgeo/meta.yml new file mode 100644 index 00000000..bbacbe04 --- /dev/null +++ b/modules/nf-core/geoquery/getgeo/meta.yml @@ -0,0 +1,47 @@ +name: "geoquery_getgeo" +description: Retrieves GEO data from the Gene Expression Omnibus (GEO) +keywords: + - geo + - expression + - microarray + +tools: + - "geoquery": + description: "Get data from NCBI Gene Expression Omnibus (GEO)" + homepage: "https://bioconductor.org/packages/release/bioc/html/GEOquery.html" + documentation: "https://bioconductor.org/packages/release/bioc/vignettes/GEOquery/inst/doc/GEOquery.html" + tool_dev_url: "https://github.com/seandavi/GEOquery" + doi: "10.1093/bioinformatics/btm254" + licence: "MIT" + +input: + - meta: + type: map + description: | + Groovy Map containing metadata about the GEO dataset, minimally 'id'. + - querygse: + type: string + description: | + GSE identifier to pass to getGEO() + +output: + - rds: + type: file + description: R object containing GEO data + pattern: "*.rds" + - expression: + type: file + description: TSV-format expression matrix + pattern: "*matrix.tsv" + - annotation: + type: file + description: TSV-format annotation file + pattern: "*annotation.tsv" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@azedinez" + - "@pinin4fjords" diff --git a/modules/nf-core/geoquery/getgeo/templates/getgeo.R b/modules/nf-core/geoquery/getgeo/templates/getgeo.R new file mode 100644 index 00000000..99d73e40 --- /dev/null +++ b/modules/nf-core/geoquery/getgeo/templates/getgeo.R @@ -0,0 +1,171 @@ +#!/usr/bin/env Rscript + +################################################ +################################################ +## Functions ## +################################################ +################################################ + +#' Parse out options from a string without recourse to optparse +#' +#' @param x Long-form argument list like --opt1 val1 --opt2 val2 +#' +#' @return named list of options and values similar to optparse + +parse_args <- function(x){ + args_list <- unlist(strsplit(x, ' ?--')[[1]])[-1] + args_vals <- lapply(args_list, function(x) scan(text=x, what='character', quiet = TRUE)) + + # Ensure the option vectors are length 2 (key/ value) to catch empty ones + args_vals <- lapply(args_vals, function(z){ length(z) <- 2; z}) + + parsed_args <- structure(lapply(args_vals, function(x) x[2]), names = lapply(args_vals, function(x) x[1])) + parsed_args[! is.na(parsed_args)] +} + +#' Round numeric dataframe columns to fixed decimal places by applying +#' formatting and converting back to numerics +#' +#' @param dataframe A data frame +#' @param columns Which columns to round (assumes all of them by default) +#' @param digits How many decimal places to round to? +#' +#' @return output Data frame + +round_dataframe_columns <- function(df, columns = NULL, digits = 8){ + if (is.null(columns)){ + columns <- colnames(df) + } + + df[,columns] <- format( + data.frame(df[, columns], check.names = FALSE), + nsmall = digits + ) + + # Convert columns back to numeric + + for (c in columns) { + df[[c]][grep("^ *NA\$", df[[c]])] <- NA + df[[c]] <- as.numeric(df[[c]]) + } + df +} + +################################################ +################################################ +## PARSE PARAMETERS FROM NEXTFLOW ## +################################################ +################################################ + +opt <- list( + querygse = '$querygse', + metacols = NULL +) +args_opt <- parse_args('$task.ext.args') +for ( ao in names(args_opt)){ + if (! ao %in% names(opt)){ + stop(paste("Invalid option:", ao)) + }else{ + opt[[ao]] <- args_opt[[ao]] + } +} + +################################################ +################################################ +## Finish loading libraries ## +################################################ +################################################ + +library(GEOquery) + +################################################ +################################################ +## Do the GEO query retrieval ## +################################################ +################################################ + +# Fetch data for GSE number + +eset <- getGEO( + GEO = opt\$querygse, + destdir = getwd() +)[[1]] + +# Write probeset annotation. If supplied, Parse metadata columns from nextflow +# parameters to subset on the feature metadata file + +probeset_annotation = fData(eset) +if (! is.null(opt\$metacols)){ + feature_cols = strsplit(opt\$metacols,',')[[1]] + probeset_annotation <- probeset_annotation[,feature_cols] +} + +################################################ +################################################ +## Generate outputs ## +################################################ +################################################ + +output_prefix <- ifelse('$task.ext.prefix' == 'null', '', '$task.ext.prefix') + +write.table( + probeset_annotation, + paste0(output_prefix,'annotation.tsv'), + col.names=TRUE, + row.names=FALSE, + sep="\t", + quote=FALSE +) + +# If data is not log scale, transform it as needed for limma downstream + +if(max(exprs(eset),na.rm=T) > 20) { # a bit dirty, needs proper solution later... + exprs(eset) <- log2(exprs(eset) + 1) +} + +saveRDS(eset, file = paste0(output_prefix, 'eset.rds')) + +# Write intensity matrix (normalised) + +write.table( + data.frame( + probe_id = rownames(eset), + round_dataframe_columns(as.data.frame(exprs(eset))), + check.names = FALSE + ), + file = paste0(output_prefix, 'matrix.tsv'), + col.names = TRUE, row.names = FALSE, + sep = '\t', quote = FALSE +) + +################################################ +################################################ +## R SESSION INFO ## +################################################ +################################################ + +sink(paste(output_prefix, "R_sessionInfo.log", sep = '.')) +print(sessionInfo()) +sink() + +################################################ +################################################ +## VERSIONS FILE ## +################################################ +################################################ + +r.version <- strsplit(version[['version.string']], ' ')[[1]][3] +geoquery.version <- as.character(packageVersion("GEOquery")) + +writeLines( + c( + '"${task.process}":', + paste(' r-base:', r.version), + paste(' bioconductor-geoquery:', geoquery.version) + ), + 'versions.yml') + +################################################ +################################################ +################################################ +################################################ diff --git a/modules/nf-core/gsea/gsea/main.nf b/modules/nf-core/gsea/gsea/main.nf index 4b7a6430..479af5db 100644 --- a/modules/nf-core/gsea/gsea/main.nf +++ b/modules/nf-core/gsea/gsea/main.nf @@ -5,7 +5,7 @@ process GSEA_GSEA { conda "bioconda::gsea=4.3.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gsea:4.3.2--hdfd78af_0': - 'quay.io/biocontainers/gsea:4.3.2--hdfd78af_0' }" + 'biocontainers/gsea:4.3.2--hdfd78af_0' }" input: tuple val(meta), path(gct), path(cls), path(gene_sets) diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf index d906034c..73bf08cd 100644 --- a/modules/nf-core/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -5,7 +5,7 @@ process GUNZIP { conda "conda-forge::sed=4.7" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: tuple val(meta), path(archive) @@ -21,10 +21,14 @@ process GUNZIP { def args = task.ext.args ?: '' gunzip = archive.toString() - '.gz' """ - gunzip \\ - -f \\ + # Not calling gunzip itself because it creates files + # with the original group ownership rather than the + # default one for that user / the work directory + gzip \\ + -cd \\ $args \\ - $archive + $archive \\ + > $gunzip cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml index 2e0e4054..4cdcdf4c 100644 --- a/modules/nf-core/gunzip/meta.yml +++ b/modules/nf-core/gunzip/meta.yml @@ -3,6 +3,7 @@ description: Compresses and decompresses files. keywords: - gunzip - compression + - decompression tools: - gunzip: description: | diff --git a/modules/nf-core/limma/differential/main.nf b/modules/nf-core/limma/differential/main.nf index 1f3f47f8..364c5a13 100644 --- a/modules/nf-core/limma/differential/main.nf +++ b/modules/nf-core/limma/differential/main.nf @@ -5,7 +5,7 @@ process LIMMA_DIFFERENTIAL { conda "bioconda::bioconductor-limma=3.54.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bioconductor-limma:3.54.0--r42hc0cfd56_0' : - 'quay.io/biocontainers/bioconductor-limma:3.54.0--r42hc0cfd56_0' }" + 'biocontainers/bioconductor-limma:3.54.0--r42hc0cfd56_0' }" input: tuple val(meta), val(contrast_variable), val(reference), val(target) diff --git a/modules/nf-core/limma/differential/templates/limma_de.R b/modules/nf-core/limma/differential/templates/limma_de.R index 47d0424f..5a80eb22 100755 --- a/modules/nf-core/limma/differential/templates/limma_de.R +++ b/modules/nf-core/limma/differential/templates/limma_de.R @@ -65,6 +65,7 @@ read_delim_flexible <- function(file, header = TRUE, row.names = NULL, check.nam # Set defaults and classes opt <- list( + output_prefix = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix'), count_file = '$intensities', sample_file = '$samplesheet', contrast_variable = '$contrast_variable', @@ -111,7 +112,7 @@ for ( ao in names(args_opt)){ # Check if required parameters have been provided -required_opts <- c('contrast_variable', 'reference_level', 'target_level') +required_opts <- c('contrast_variable', 'reference_level', 'target_level', 'output_prefix') missing <- required_opts[unlist(lapply(opt[required_opts], is.null)) | ! required_opts %in% names(opt)] if (length(missing) > 0){ @@ -340,10 +341,6 @@ comp.results <- do.call(topTable, toptable_args)[rownames(intensities.table),] ################################################ ################################################ -prefix_part_names <- c('contrast_variable', 'reference_level', 'target_level', 'blocking_variables') -prefix_parts <- unlist(lapply(prefix_part_names, function(x) gsub("[^[:alnum:]]", "_", opt[[x]]))) -output_prefix <- paste(prefix_parts[prefix_parts != ''], collapse = '-') - contrast.name <- paste(opt\$target_level, opt\$reference_level, sep = "_vs_") cat("Saving results for ", contrast.name, " ...\n", sep = "") @@ -356,7 +353,7 @@ write.table( probe_id = rownames(comp.results), comp.results ), - file = paste(output_prefix, 'limma.results.tsv', sep = '.'), + file = paste(opt\$output_prefix, 'limma.results.tsv', sep = '.'), col.names = TRUE, row.names = FALSE, sep = '\t', @@ -366,7 +363,7 @@ write.table( # Dispersion plot png( - file = paste(output_prefix, 'limma.mean_difference.png', sep = '.'), + file = paste(opt\$output_prefix, 'limma.mean_difference.png', sep = '.'), width = 600, height = 600 ) @@ -375,7 +372,7 @@ dev.off() # R object for other processes to use -saveRDS(fit2, file = paste(output_prefix, 'MArrayLM.limma.rds', sep = '.')) +saveRDS(fit2, file = paste(opt\$output_prefix, 'MArrayLM.limma.rds', sep = '.')) ################################################ ################################################ @@ -383,7 +380,7 @@ saveRDS(fit2, file = paste(output_prefix, 'MArrayLM.limma.rds', sep = '.')) ################################################ ################################################ -sink(paste(output_prefix, "R_sessionInfo.log", sep = '.')) +sink(paste(opt\$output_prefix, "R_sessionInfo.log", sep = '.')) print(sessionInfo()) sink() diff --git a/modules/nf-core/proteus/readproteingroups/main.nf b/modules/nf-core/proteus/readproteingroups/main.nf new file mode 100644 index 00000000..37126cfe --- /dev/null +++ b/modules/nf-core/proteus/readproteingroups/main.nf @@ -0,0 +1,30 @@ +process PROTEUS_READPROTEINGROUPS { + tag "$meta.id" + label 'process_single' + + conda "conda-forge::r-base=4.2.1 bioconda::r-proteus-bartongroup=0.2.16 conda-forge::r-plotly=4.10.2 bioconda::bioconductor-limma=3.54.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-4e01206f2c47f56077f04e5d2d7b312f50513a1e:92abccefbeb09795ad6a93553b62a6ad3daaea48-0': + 'biocontainers/mulled-v2-4e01206f2c47f56077f04e5d2d7b312f50513a1e:92abccefbeb09795ad6a93553b62a6ad3daaea48-0' }" + + input: + tuple val(meta), path(samplesheet), path(intensities) + + output: + tuple val(meta), path("*dendrogram.png") , emit: dendro_plot + tuple val(meta), path("*mean_variance_relationship.png") , emit: mean_var_plot + tuple val(meta), path("*raw_distributions.png") , emit: raw_dist_plot + tuple val(meta), path("*normalized_distributions.png") , emit: norm_dist_plot + tuple val(meta), path("*raw_proteingroups.rds") , emit: raw_rdata + tuple val(meta), path("*normalized_proteingroups.rds") , emit: norm_rdata + tuple val(meta), path("*raw_proteingroups_tab.tsv") , emit: raw_tab + tuple val(meta), path("*normalized_proteingroups_tab.tsv") , emit: norm_tab + tuple val(meta), path("*R_sessionInfo.log") , emit: session_info + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + template 'proteus_readproteingroups.R' +} diff --git a/modules/nf-core/proteus/readproteingroups/meta.yml b/modules/nf-core/proteus/readproteingroups/meta.yml new file mode 100644 index 00000000..bed3dc68 --- /dev/null +++ b/modules/nf-core/proteus/readproteingroups/meta.yml @@ -0,0 +1,81 @@ +name: "proteus_readproteingroups" +description: reads a maxQuant proteinGroups file with Proteus +keywords: + - proteomics + - proteus + - readproteingroups +tools: + - "proteus": + description: "R package for analysing proteomics data" + homepage: "https://github.com/bartongroup/Proteus" + documentation: "https://rdrr.io/github/bartongroup/Proteus/" + tool_dev_url: "https://github.com/bartongroup/Proteus" + doi: "10.1101/416511" + licence: "['GPL v2']" + +input: + - meta: + type: map + description: | + Groovy Map containing contrast information, e.g. [ variable:'treatment', reference:'treated', control:'saline', blocking:'' ] + - samplesheet: + type: file + description: | + CSV or TSV format sample sheet with sample metadata; check here for specifications: https://rdrr.io/github/bartongroup/Proteus/man/readProteinGroups.html + - intensities: + type: file + description: | + proteinGroups TXT file with protein intensities information from maxQuant; check here for specifications: https://rdrr.io/github/bartongroup/Proteus/man/readProteinGroups.html + - meta2: + type: map + description: | + Groovy Map containing contrast information, e.g. [ variable:'treatment', reference:'treated', control:'saline', blocking:'' ] + - contrast_variable: + type: string + description: | + The column in the sample sheet that should be used to define groups for comparison + +output: + - dendro_plot: + type: file + description: | + PNG file; dendrogram of the normalized samples hierarchically clustered by their intensities + - mean_var_plot: + type: file + description: | + PNG file; plot of the log-intensity variance vs log-intensity mean of each condition in the normalized samples + - raw_dist_plot: + type: file + description: | + PNG file; plot of the intensity/ratio distributions of the raw samples + - norm_dist_plot: + type: file + description: | + PNG file; plot of the intensity/ratio distributions of the normalized samples + - raw_rdata: + type: file + description: | + RDS file of a proteinGroups object from Proteus, contains raw protein intensities and additional info + - norm_rdata: + type: file + description: | + RDS file of a proteinGroups object from Proteus, contains normalized protein intensities and additional info + - raw_tab: + type: file + description: | + TSV-format intensities table from Proteus, contains raw protein intensities + - norm_tab: + type: file + description: | + TSV-format intensities table from Proteus, contains normalized protein intensities + - session_info: + type: file + description: | + LOG file of the R sessionInfo from the module run + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@WackerO" diff --git a/modules/nf-core/proteus/readproteingroups/templates/proteus_readproteingroups.R b/modules/nf-core/proteus/readproteingroups/templates/proteus_readproteingroups.R new file mode 100644 index 00000000..5806971d --- /dev/null +++ b/modules/nf-core/proteus/readproteingroups/templates/proteus_readproteingroups.R @@ -0,0 +1,382 @@ +#!/usr/bin/env Rscript + +# Written by Oskar Wacker (https://github.com/WackerO) in +# collaboration with Stefan Czemmel (https://github.com/qbicStefanC) +# Script template by Jonathan Manning (https://github.com/pinin4fjords) + +# MIT License + +# Copyright (c) QBiC + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +################################################ +################################################ +## Functions ## +################################################ +################################################ + +#' Parse out options from a string without recourse to optparse +#' +#' @param x Long-form argument list like --opt1 val1 --opt2 val2 +#' +#' @return named list of options and values similar to optparse + +parse_args <- function(x) { + args_list <- unlist(strsplit(x, ' ?--')[[1]])[-1] + args_vals <- lapply(args_list, function(x) scan(text=x, what='character', quiet = TRUE)) + + # Ensure the option vectors are length 2 (key/ value) to catch empty ones + args_vals <- lapply(args_vals, function(z) { length(z) <- 2; z}) + + parsed_args <- structure(lapply(args_vals, function(x) x[2]), names = lapply(args_vals, function(x) x[1])) + parsed_args[! is.na(parsed_args)] +} + +#' Flexibly read CSV or TSV files +#' +#' @param file Input file +#' @param header Passed to read.delim() +#' @param row.names Passed to read.delim() +#' +#' @return output Data frame + +read_delim_flexible <- function(file, header = TRUE, row.names = NULL, check.names = F) { + + ext <- tolower(tail(strsplit(basename(file), split = "\\\\.")[[1]], 1)) + + if (ext == "tsv" || ext == "txt") { + separator <- "\\t" + } else if (ext == "csv") { + separator <- "," + } else { + stop(paste("Unknown separator for", ext)) + } + + read.delim( + file, + sep = separator, + header = header, + row.names = row.names, + check.names = check.names + ) +} + +#' Round numeric dataframe columns to fixed decimal places by applying +#' formatting and converting back to numerics +#' +#' @param dataframe A data frame +#' @param columns Which columns to round (assumes all of them by default) +#' @param digits How many decimal places to round to? If -1, will return the unchanged input df +#' +#' @return output Data frame +round_dataframe_columns <- function(df, columns = NULL, digits = -1) { + if (digits == -1) { + return(df) # if -1, return df without rounding + } + + df <- data.frame(df, check.names = FALSE) # make data.frame from vector as otherwise, the format will get messed up + if (is.null(columns)) { + columns <- colnames(df) + } + df[,columns] <- round( + data.frame(df[, columns], check.names = FALSE), + digits = digits + ) + + # Convert columns back to numeric + + for (c in columns) { + df[[c]][grep("^ *NA\$", df[[c]])] <- NA + df[[c]] <- as.numeric(df[[c]]) + } + df +} + +################################################ +################################################ +## PARSE PARAMETERS FROM NEXTFLOW ## +################################################ +################################################ + +# I've defined these in a single array like this so that we could go back to an +# optparse-driven method in future with module bin/ directories, rather than +# the template + +# Set defaults and classes + +opt <- list( + intensities_file = '$intensities', + sample_file = '$samplesheet', + contrast_variable = NULL, + protein_id_col = 'Majority protein IDs', + sample_id_col = 'sample', + measure_col_prefix = 'intensities', + norm_function = 'normalizeMedian', + plotsd_method = 'violin', + plotmv_loess = T, + palette_name = 'Set1', + round_digits = -1 +) +opt_types <- lapply(opt, class) + +# Apply parameter overrides + +args_opt <- parse_args('$task.ext.args') +for ( ao in names(args_opt)) { + if (! ao %in% names(opt)) { + stop(paste("Invalid option:", ao)) + } else { + + # Preserve classes from defaults where possible + if (! is.null(opt[[ao]])) { + args_opt[[ao]] <- as(args_opt[[ao]], opt_types[[ao]]) + } + opt[[ao]] <- args_opt[[ao]] + } +} + +# Check if required parameters have been provided + +required_opts <- c('contrast_variable') +missing <- required_opts[unlist(lapply(opt[required_opts], is.null)) | ! required_opts %in% names(opt)] + +if (length(missing) > 0) { + stop(paste("Missing required options:", paste(missing, collapse=', '))) +} + +# Check file inputs are valid + +for (file_input in c('intensities_file', 'sample_file')) { + if (is.null(opt[[file_input]])) { + stop(paste("Please provide", file_input), call. = FALSE) + } + + if (! file.exists(opt[[file_input]])) { + stop(paste0('Value of ', file_input, ': ', opt[[file_input]], ' is not a valid file')) + } +} + +################################################ +################################################ +## Finish loading libraries ## +################################################ +################################################ + +library(limma) +library(plotly) +library(proteus) + +################################################ +################################################ +# READ IN INTENSITIES FILE AND SAMPLE METADATA # +################################################ +################################################ + +intensities.table <- + read_delim_flexible( + file = opt\$intensities_file, + check.names = FALSE + ) + +sample.sheet <- + read_delim_flexible( + file = opt\$sample_file, + check.names=FALSE + ) + +if (! opt\$protein_id_col %in% colnames(intensities.table)) { + stop(paste0("Specified protein ID column '", opt\$protein_id_col, "' is not in the intensities table")) +} + +if (! opt\$sample_id_col %in% colnames(sample.sheet)) { + stop(paste0("Specified sample ID column '", opt\$sample_id_col, "' is not in the sample sheet")) +} + +# Add metadata columns that are necessary for proteus + +sample.sheet\$sample <- sample.sheet[[opt\$sample_id_col]] +sample.sheet\$condition <- sample.sheet[[opt\$contrast_variable]] + +# Add prefix for proteinGroups measurement columns to the sample IDs from the sampesheet + +measure.cols <- setNames(paste0(opt\$measure_col_prefix, sample.sheet[[opt\$sample_id_col]]), sample.sheet[[opt\$sample_id_col]]) + +# Check that all samples specified in the input sheet are present in the intensities table + +missing_columns <- paste0(opt\$measure_col_prefix, sample.sheet[[opt\$sample_id_col]]) +missing_columns <- missing_columns[!missing_columns %in% colnames(intensities.table)] +if (length(missing_columns) > 0) { + stop(paste( + length(missing_columns), + 'specified samples do not have a(n)', + opt\$measure_col_prefix, + 'column in intensities table. The following columns are missing:', + paste(missing_columns, collapse = ', ') + )) +} + +################################################ +################################################ +## Run Proteus processes and generate outputs ## +################################################ +################################################ + +# Replace proteus default ID column with user param and re-set the names of the resulting object (gsub sets the names to NULL) + +proteinColumns <- setNames(gsub("Majority protein IDs", opt\$protein_id_col, proteus::proteinColumns), names(proteus::proteinColumns)) +proteinGroups <- readProteinGroups( + file=opt\$intensities_file, + meta=sample.sheet, + measure.cols=measure.cols, + data.cols=proteinColumns +) + +# Define valid normalization functions + +valid_norm_functions <- list("normalizeMedian", "normalizeQuantiles") + +# Generate plots for requested normalization; also, save normalized protein groups for limma + +if (! (opt\$norm_function %in% valid_norm_functions)) { + stop(paste0("Invalid norm_function argument: ", opt\$norm_function, + ". Valid norm_functions are: ", paste(valid_norm_functions, collapse=", "), ".")) +} + +proteinGroups.normalized <- normalizeData(proteinGroups, norm.fun = eval(parse(text=opt\$norm_function))) # Proteus also accepts other norm.funs, e.g. from limma +proteinGroups.normalized\$tab <- log2(proteinGroups.normalized\$tab) + +png(paste(opt\$norm_function, 'normalized_distributions.png', sep='.'), width = 5*300, height = 5*300, res = 300, pointsize = 8) +print( + plotSampleDistributions(proteinGroups.normalized, title=paste0("Sample distributions after applying\n", opt\$norm_function, " in contrast ", opt\$contrast_variable), fill="condition", method=opt\$plotsd_method) + + scale_fill_brewer(palette=opt\$palette_name, name=opt\$contrast_variable) + + theme(plot.title = element_text(size = 12)) + ) +dev.off() + +png(paste(opt\$norm_function, 'normalized_mean_variance_relationship.png', sep='.'), width = 5*300, height = 5*300, res = 300, pointsize = 8) +print( + plotMV(proteinGroups.normalized, with.loess=opt\$plotmv_loess) + + ggtitle(paste0("Sample mean variance relationship after applying\n", opt\$norm_function, " in contrast ", opt\$contrast_variable)) + + scale_fill_distiller(palette=opt\$palette_name) + + theme(plot.title = element_text(size = 12)) + ) +dev.off() + +png(paste(opt\$norm_function, 'normalized_dendrogram.png', sep='.'), width = 5*300, height = 5*300, res = 300, pointsize = 8) +print( + plotClustering(proteinGroups.normalized) + + ggtitle(paste0("Sample clustering after applying\n", opt\$norm_function, " in contrast ", opt\$contrast_variable)) + + theme(plot.title = element_text(size = 12)) + ) +dev.off() + +# R object for other processes to use + +saveRDS(proteinGroups.normalized, file = paste(opt\$norm_function, 'normalized_proteingroups.rds', sep='.')) + +# Write normalized intensities matrix + +out_df <- data.frame( + round_dataframe_columns(proteinGroups.normalized\$tab, digits=opt\$round_digits), + check.names = FALSE +) +out_df[[opt\$protein_id_col]] <- rownames(proteinGroups.normalized\$tab) # proteus saves the IDs as rownames; save these to a separate column +out_df <- out_df[c(opt\$protein_id_col, colnames(out_df)[colnames(out_df) != opt\$protein_id_col])] # move ID column to first position +write.table( + out_df, + file = paste(opt\$norm_function, 'normalized_proteingroups_tab', 'tsv', sep = '.'), + col.names = TRUE, + row.names = FALSE, + sep = '\t', + quote = FALSE +) + +# Process and save raw table + +proteinGroups\$tab <- log2(proteinGroups\$tab) + +# Generate raw distribution plot + +png('raw_distributions.png', width = 5*300, height = 5*300, res = 300, pointsize = 8) +print( + plotSampleDistributions(proteinGroups, title=paste("Raw sample distributions in contrast", opt\$contrast_variable), fill="condition", method=opt\$plotsd_method) + + scale_fill_brewer(palette=opt\$palette_name, name=opt\$contrast_variable) + + theme(plot.title = element_text(size = 12)) + ) +dev.off() + +# R object for other processes to use + +saveRDS(proteinGroups, file = 'raw_proteingroups.rds') + + +# Write raw intensities matrix + +out_df <- data.frame( + round_dataframe_columns(proteinGroups\$tab, digits=opt\$round_digits), + check.names = FALSE + ) +out_df[[opt\$protein_id_col]] <- rownames(proteinGroups\$tab) # proteus saves the IDs as rownames; save these to a separate column +out_df <- out_df[c(opt\$protein_id_col, colnames(out_df)[colnames(out_df) != opt\$protein_id_col])] # move ID column to first position + + +write.table( + out_df, + file = 'raw_proteingroups_tab.tsv', + col.names = TRUE, + row.names = FALSE, + sep = '\t', + quote = FALSE +) + +################################################ +################################################ +## R SESSION INFO ## +################################################ +################################################ + +sink("R_sessionInfo.log") +print(sessionInfo()) +sink() + +################################################ +################################################ +## VERSIONS FILE ## +################################################ +################################################ + +r.version <- strsplit(version[['version.string']], ' ')[[1]][3] +limma.version <- as.character(packageVersion('limma')) +plotly.version <- as.character(packageVersion('plotly')) +proteus.version <- as.character(packageVersion('proteus')) +writeLines( + c( + '"${task.process}":', + paste(' r-base:', r.version), + paste(' r-proteus-bartongroup:', proteus.version), + paste(' r-plotly:', plotly.version), + paste(' bioconductor-limma:', limma.version) + ), +'versions.yml') + +################################################ +################################################ +################################################ +################################################ diff --git a/modules/nf-core/rmarkdownnotebook/main.nf b/modules/nf-core/rmarkdownnotebook/main.nf index b6e55584..3eda8ee9 100644 --- a/modules/nf-core/rmarkdownnotebook/main.nf +++ b/modules/nf-core/rmarkdownnotebook/main.nf @@ -10,7 +10,7 @@ process RMARKDOWNNOTEBOOK { conda "conda-forge::r-base=4.1.0 conda-forge::r-rmarkdown=2.9 conda-forge::r-yaml=2.2.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-31ad840d814d356e5f98030a4ee308a16db64ec5:0e852a1e4063fdcbe3f254ac2c7469747a60e361-0' : - 'quay.io/biocontainers/mulled-v2-31ad840d814d356e5f98030a4ee308a16db64ec5:0e852a1e4063fdcbe3f254ac2c7469747a60e361-0' }" + 'biocontainers/mulled-v2-31ad840d814d356e5f98030a4ee308a16db64ec5:0e852a1e4063fdcbe3f254ac2c7469747a60e361-0' }" input: tuple val(meta), path(notebook) @@ -18,10 +18,11 @@ process RMARKDOWNNOTEBOOK { path input_files output: - tuple val(meta), path("*.html") , emit: report - tuple val(meta), path ("artifacts/*") , emit: artifacts, optional: true - tuple val(meta), path ("session_info.log"), emit: session_info - path "versions.yml" , emit: versions + tuple val(meta), path("*.html") , emit: report + tuple val(meta), path("*.parameterised.Rmd") , emit: parameterised_notebook, optional: true + tuple val(meta), path ("artifacts/*") , emit: artifacts, optional: true + tuple val(meta), path ("session_info.log") , emit: session_info + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -53,10 +54,53 @@ process RMARKDOWNNOTEBOOK { params_cmd = dump_params_yml(nb_params) render_cmd = """\ params = yaml::read_yaml('.params.yml') - rmarkdown::render('${prefix}.Rmd', params=params, envir=new.env()) + + # Instead of rendering with params, produce a version of the R + # markdown with param definitions set, so the notebook itself can + # be reused + rmd_content <- readLines('${prefix}.Rmd') + + # Extract YAML content between the first two '---' + start_idx <- which(rmd_content == "---")[1] + end_idx <- which(rmd_content == "---")[2] + rmd_yaml_content <- paste(rmd_content[(start_idx+1):(end_idx-1)], collapse = "\\n") + rmd_params <- yaml::yaml.load(rmd_yaml_content) + + # Override the params + rmd_params[['params']] <- modifyList(rmd_params[['params']], params) + + # Recursive function to add 'value' to list elements, except for top-level + add_value_recursively <- function(lst, is_top_level = FALSE) { + if (!is.list(lst)) { + return(lst) + } + + lst <- lapply(lst, add_value_recursively) + if (!is_top_level) { + lst <- list(value = lst) + } + return(lst) + } + + # Reformat nested lists under 'params' to have a 'value' key recursively + rmd_params[['params']] <- add_value_recursively(rmd_params[['params']], is_top_level = TRUE) + + # Convert back to YAML string + updated_yaml_content <- as.character(yaml::as.yaml(rmd_params)) + + # Remove the old YAML content + rmd_content <- rmd_content[-((start_idx+1):(end_idx-1))] + + # Insert the updated YAML content at the right position + rmd_content <- append(rmd_content, values = unlist(strsplit(updated_yaml_content, split = "\\n")), after = start_idx) + + writeLines(rmd_content, '${prefix}.parameterised.Rmd') + + # Render based on the updated file + rmarkdown::render('${prefix}.parameterised.Rmd', output_file='${prefix}.html', envir = new.env()) """ } else { - render_cmd = "rmarkdown::render('${prefix}.Rmd')" + render_cmd = "rmarkdown::render('${prefix}.Rmd', output_file='${prefix}.html')" } """ diff --git a/modules/nf-core/rmarkdownnotebook/meta.yml b/modules/nf-core/rmarkdownnotebook/meta.yml index 08336169..cdd16278 100644 --- a/modules/nf-core/rmarkdownnotebook/meta.yml +++ b/modules/nf-core/rmarkdownnotebook/meta.yml @@ -46,7 +46,7 @@ input: Groovy map with notebook parameters which will be passed to rmarkdown to generate parametrized reports. - input_files: - type: path + type: file description: One or multiple files serving as input data for the notebook. pattern: "*" diff --git a/modules/nf-core/shinyngs/app/main.nf b/modules/nf-core/shinyngs/app/main.nf index b5c51234..9cdf5cb1 100644 --- a/modules/nf-core/shinyngs/app/main.nf +++ b/modules/nf-core/shinyngs/app/main.nf @@ -13,10 +13,10 @@ process SHINYNGS_APP { // // Those values must then be set in your Nextflow secrets. - conda "bioconda::r-shinyngs=1.7.1" + conda "bioconda::r-shinyngs=1.8.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.7.1--r42hdfd78af_1': - 'quay.io/biocontainers/r-shinyngs:1.7.1--r42hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.8.4--r43hdfd78af_0' : + 'biocontainers/r-shinyngs:1.8.4--r43hdfd78af_0' }" input: tuple val(meta), path(sample), path(feature_meta), path(assay_files) // Experiment-level info @@ -37,12 +37,9 @@ process SHINYNGS_APP { def prefix = task.ext.prefix ?: meta.id """ - cp $feature_meta fixed_$feature_meta - sed -i.bak s/${params.features_name_col}/gene_name/ fixed_$feature_meta - make_app_from_files.R \\ --sample_metadata $sample \\ - --feature_metadata fixed_$feature_meta \\ + --feature_metadata $feature_meta \\ --assay_files ${assay_files.join(',')} \\ --contrast_file $contrasts \\ --contrast_stats_assay $contrast_stats_assay \\ diff --git a/modules/nf-core/shinyngs/app/meta.yml b/modules/nf-core/shinyngs/app/meta.yml index a695351d..1c5cb831 100644 --- a/modules/nf-core/shinyngs/app/meta.yml +++ b/modules/nf-core/shinyngs/app/meta.yml @@ -34,7 +34,7 @@ input: description: | TSV-format feature (e.g. gene) metadata - assay_files: - type: list + type: file description: | List of TSV-format matrix files representing different measures for the same samples (e.g. raw and normalised). - contrasts: @@ -42,7 +42,7 @@ input: description: | CSV-format file with four columns identifying the sample sheet variable, reference level, treatment level, and optionally a comma-separated list of covariates used as blocking factors. - differential_results: - type: list + type: file description: | List of TSV-format differential analysis outputs, one per row of the contrasts file diff --git a/modules/nf-core/shinyngs/app/shinyngs-app.diff b/modules/nf-core/shinyngs/app/shinyngs-app.diff deleted file mode 100644 index bf8a864a..00000000 --- a/modules/nf-core/shinyngs/app/shinyngs-app.diff +++ /dev/null @@ -1,19 +0,0 @@ -Changes in module 'nf-core/shinyngs/app' ---- modules/nf-core/shinyngs/app/main.nf -+++ modules/nf-core/shinyngs/app/main.nf -@@ -37,9 +37,12 @@ - def prefix = task.ext.prefix ?: meta.id - - """ -+ cp $feature_meta fixed_$feature_meta -+ sed -i.bak s/${params.features_name_col}/gene_name/ fixed_$feature_meta -+ - make_app_from_files.R \\ - --sample_metadata $sample \\ -- --feature_metadata $feature_meta \\ -+ --feature_metadata fixed_$feature_meta \\ - --assay_files ${assay_files.join(',')} \\ - --contrast_file $contrasts \\ - --contrast_stats_assay $contrast_stats_assay \\ - -************************************************************ diff --git a/modules/nf-core/shinyngs/staticdifferential/main.nf b/modules/nf-core/shinyngs/staticdifferential/main.nf index bef46399..e25edf5b 100644 --- a/modules/nf-core/shinyngs/staticdifferential/main.nf +++ b/modules/nf-core/shinyngs/staticdifferential/main.nf @@ -2,10 +2,10 @@ process SHINYNGS_STATICDIFFERENTIAL { tag "$meta.id" label 'process_single' - conda "bioconda::r-shinyngs=1.7.1" + conda "bioconda::r-shinyngs=1.8.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.7.1--r42hdfd78af_1': - 'quay.io/biocontainers/r-shinyngs:1.7.1--r42hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.8.4--r43hdfd78af_0' : + 'biocontainers/r-shinyngs:1.8.4--r43hdfd78af_0' }" input: tuple val(meta), path(differential_result) // Differential info: contrast and differential stats diff --git a/modules/nf-core/shinyngs/staticdifferential/meta.yml b/modules/nf-core/shinyngs/staticdifferential/meta.yml index 22ec8a77..84fe31de 100644 --- a/modules/nf-core/shinyngs/staticdifferential/meta.yml +++ b/modules/nf-core/shinyngs/staticdifferential/meta.yml @@ -27,7 +27,7 @@ input: features and samples, at a minimum an id. e.g. [ id:'test' ] - differential_results: - type: list + type: file description: | CSV or TSV-format tabular file with differential analysis outputs - sample: @@ -39,7 +39,7 @@ input: description: | CSV or TSV-format feature (e.g. gene) metadata - assay_file: - type: list + type: file description: | CSV or TSV matrix file to use alongside differential statistics in interpretation. Usually a normalised form. @@ -50,11 +50,16 @@ output: description: | Groovy Map containing contrast information e.g. [ variable:'treatment', reference:'treated', control:'saline', blocking:'' ] - - volcanos: - type: tuple + - volcanos_png: + type: file + description: | + Meta-keyed tuple containing a PNG output for a volcano plot built from + the differential result table. + - volcanos_html: + type: file description: | - Meta-keyed tuple containing a PNG and HTML plot for a volcano plot - built from the differential result table. + Meta-keyed tuple containing an HTML output for a volcano plot built + from the differential result table. - versions: type: file description: File containing software versions diff --git a/modules/nf-core/shinyngs/staticexploratory/main.nf b/modules/nf-core/shinyngs/staticexploratory/main.nf index c1572087..4f5cbc33 100644 --- a/modules/nf-core/shinyngs/staticexploratory/main.nf +++ b/modules/nf-core/shinyngs/staticexploratory/main.nf @@ -2,10 +2,10 @@ process SHINYNGS_STATICEXPLORATORY { tag "$meta.id" label 'process_single' - conda "bioconda::r-shinyngs=1.7.1" + conda "bioconda::r-shinyngs=1.8.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.7.1--r42hdfd78af_1': - 'quay.io/biocontainers/r-shinyngs:1.7.1--r42hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.8.4--r43hdfd78af_0' : + 'biocontainers/r-shinyngs:1.8.4--r43hdfd78af_0' }" input: tuple val(meta), path(sample), path(feature_meta), path(assay_files) @@ -19,7 +19,7 @@ process SHINYNGS_STATICEXPLORATORY { tuple val(meta), path("*/html/pca2d.html") , emit: pca2d_html, optional: true tuple val(meta), path("*/png/pca3d.png") , emit: pca3d_png tuple val(meta), path("*/html/pca3d.html") , emit: pca3d_html, optional: true - tuple val(meta), path("*/png/mad_correlation.png") , emit: mad_png + tuple val(meta), path("*/png/mad_correlation.png") , emit: mad_png, optional: true tuple val(meta), path("*/html/mad_correlation.html") , emit: mad_html, optional: true tuple val(meta), path("*/png/sample_dendrogram.png") , emit: dendro path "versions.yml" , emit: versions diff --git a/modules/nf-core/shinyngs/staticexploratory/meta.yml b/modules/nf-core/shinyngs/staticexploratory/meta.yml index 008214f9..6f745da5 100644 --- a/modules/nf-core/shinyngs/staticexploratory/meta.yml +++ b/modules/nf-core/shinyngs/staticexploratory/meta.yml @@ -30,40 +30,64 @@ input: description: | TSV-format feature (e.g. gene) metadata - assay_files: - type: list + type: file description: | List of TSV-format matrix files representing different measures for the same samples (e.g. raw and normalised). output: - - boxplots: - type: tuple + - boxplots_png: + type: file + description: | + Meta-keyed tuple containing PNG output for box plots covering input + matrices. + - boxplots_html: + type: file + description: | + Meta-keyed tuple containing HTML output for box plots covering input + matrices. + - densities_png: + type: file description: | - Meta-keyed tuple containing a PNG and HTML output for box plots + Meta-keyed tuple containing PNG output for density plots covering input matrices. - - densities: - type: tuple + - densities_html: + type: file description: | - Meta-keyed tuple containing a PNG and HTML output for density plots + Meta-keyed tuple containing HTML output for density plots covering input matrices. - - pca2d: - type: tuple + - pca2d_png: + type: file + description: | + Meta-keyed tuple containing a PNG output for 2D PCA plots covering + specified input matrix (by default the last one in the input list. + - pca2d_html: + type: file description: | - Meta-keyed tuple containing a PNG and HTML plot for 2D PCA plots + Meta-keyed tuple containing an HTML output for 2D PCA plots covering + specified input matrix (by default the last one in the input list. + - pca3d_png: + type: file + description: | + Meta-keyed tuple containing a PNG output for 3D PCA plots covering + specified input matrix (by default the last one in the input list. + - pca3d_html: + type: file + description: | + Meta-keyed tuple containing an HTML output for 3D PCA plots covering + specified input matrix (by default the last one in the input list. + - mad_png: + type: file + description: | + Meta-keyed tuple containing a PNG output for MAD correlation plots covering specified input matrix (by default the last one in the input list. - - pca3d: - type: tuple + - mad_dendro: + type: file description: | - Meta-keyed tuple containing a PNG and HTML plot for 3D PCA plots + Meta-keyed tuple containing an HTML output for MAD correlation plots covering specified input matrix (by default the last one in the input list. - - mad: - type: tuple - description: | - Meta-keyed tuple containing a PNG and HTML plot for MAD correlation - plots covering specified input matrix (by default the last one in the - input list. - dendro: - type: tuple + type: file description: | Meta-keyed tuple containing a PNG, for a sample clustering dendrogramcovering specified input matrix (by default the last one in diff --git a/modules/nf-core/shinyngs/validatefomcomponents/main.nf b/modules/nf-core/shinyngs/validatefomcomponents/main.nf index 7a488b2e..0e63c89a 100644 --- a/modules/nf-core/shinyngs/validatefomcomponents/main.nf +++ b/modules/nf-core/shinyngs/validatefomcomponents/main.nf @@ -2,10 +2,10 @@ process SHINYNGS_VALIDATEFOMCOMPONENTS { tag "$sample" label 'process_single' - conda "bioconda::r-shinyngs=1.7.1" + conda "bioconda::r-shinyngs=1.8.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.7.1--r42hdfd78af_1': - 'quay.io/biocontainers/r-shinyngs:1.7.1--r42hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.8.4--r43hdfd78af_0' : + 'biocontainers/r-shinyngs:1.8.4--r43hdfd78af_0' }" input: tuple val(meta), path(sample), path(assay_files) diff --git a/modules/nf-core/shinyngs/validatefomcomponents/meta.yml b/modules/nf-core/shinyngs/validatefomcomponents/meta.yml index 0cf380da..7ad75968 100644 --- a/modules/nf-core/shinyngs/validatefomcomponents/meta.yml +++ b/modules/nf-core/shinyngs/validatefomcomponents/meta.yml @@ -3,6 +3,9 @@ description: validate consistency of feature and sample annotations with matrice keywords: - expression + - features + - observations + - validation tools: - "shinyngs": @@ -42,7 +45,7 @@ input: description: | TSV-format feature (e.g. gene) metadata - assay_files: - type: list + type: file description: | List of TSV-format matrix files representing different measures for the same samples (e.g. raw and normalised). - contrasts: diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf index 3384847a..61461c39 100644 --- a/modules/nf-core/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -2,10 +2,10 @@ process UNTAR { tag "$archive" label 'process_single' - conda "conda-forge::sed=4.7 bioconda::grep=3.4 conda-forge::tar=1.34" + conda "conda-forge::sed=4.7 conda-forge::grep=3.11 conda-forge::tar=1.34" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: tuple val(meta), path(archive) diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml index ea7a3f38..db241a6e 100644 --- a/modules/nf-core/untar/meta.yml +++ b/modules/nf-core/untar/meta.yml @@ -3,6 +3,7 @@ description: Extract files. keywords: - untar - uncompress + - extract tools: - untar: description: | diff --git a/modules/nf-core/zip/main.nf b/modules/nf-core/zip/main.nf new file mode 100644 index 00000000..e281f2fa --- /dev/null +++ b/modules/nf-core/zip/main.nf @@ -0,0 +1,36 @@ +process ZIP { + tag "$prefix" + label 'process_single' + + conda "conda-forge::p7zip=16.02" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/p7zip:16.02' : + 'biocontainers/p7zip:16.02' }" + + input: + tuple val(meta), path(files, stageAs: "inputs/*") + + output: + tuple val(meta), path("${prefix}.zip"), emit: zipped_archive + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : 'zipped_files') + """ + 7z \\ + a \\ + -l \\ + $args \\ + "${prefix}.zip" ./inputs/* + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + 7za: \$(echo \$(7za --help) | sed 's/.*p7zip Version //; s/(.*//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/zip/meta.yml b/modules/nf-core/zip/meta.yml new file mode 100644 index 00000000..6aaf0aa4 --- /dev/null +++ b/modules/nf-core/zip/meta.yml @@ -0,0 +1,43 @@ +name: zip +description: Compress file lists to produce ZIP archive files +keywords: + - unzip + - decompression + - zip + - archiving +tools: + - unzip: + description: p7zip is a quick port of 7z.exe and 7za.exe (command line version of 7zip, see www.7-zip.org) for Unix. + homepage: https://sourceforge.net/projects/p7zip/ + documentation: https://sourceforge.net/projects/p7zip/ + tool_dev_url: https://sourceforge.net/projects/p7zip" + licence: ["LGPL-2.1-or-later"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files: + type: file + description: File or list of files to be zipped + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - zipped_archive: + type: file + description: ZIP file + pattern: "*.zip" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@jfy133" + - "@pinin4fjords" diff --git a/nextflow.config b/nextflow.config index 59098513..3912922d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -15,9 +15,10 @@ params { study_type = 'rnaseq' study_abundance_type = 'counts' contrasts = null + querygse = null matrix = null control_features = null - sizefactors_from_controls = null + sizefactors_from_controls = false // Reporting logo_file = "${projectDir}/docs/images/nf-core-differentialabundance_logo_light.png" @@ -39,12 +40,17 @@ params { features_id_col = 'gene_id' features_name_col = 'gene_name' features_metadata_cols = 'gene_id,gene_name,gene_biotype' + features_log2_assays = null + + // GTF parsing options + features_gtf_feature_type = 'transcript' + features_gtf_table_first_field = 'gene_id' // Affy-specific options affy_cel_files_archive = null affy_file_name_col = 'file' affy_background = true - affy_bgversion = 2 + affy_bgversion = 2 affy_destructive = false affy_cdfname = null affy_rm_mask = false @@ -52,6 +58,14 @@ params { affy_rm_extra = false affy_build_annotation = true + // Proteus-specific options + proteus_measurecol_prefix = 'LFQ intensity ' + proteus_norm_function = 'normalizeMedian' + proteus_plotsd_method = 'violin' + proteus_plotmv_loess = true + proteus_palette_name = 'Set1' + proteus_round_digits = -1 + // Filtering options filtering_min_samples = 1 filtering_min_abundance = 1 @@ -68,9 +82,9 @@ params { exploratory_assay_names = "raw,normalised,variance_stabilised" exploratory_final_assay = "variance_stabilised" exploratory_palette_name = 'Set1' - + // Differential options - differential_file_suffix = ".deseq2.results.tsv" + differential_file_suffix = ".deseq2.results.tsv" differential_feature_id_column = "gene_id" differential_feature_name_column = "gene_name" differential_fc_column = "log2FoldChange" @@ -82,7 +96,7 @@ params { differential_foldchanges_logged = true differential_palette_name = 'Set1' differential_subset_to_contrast_samples = false - + // DESeq2-specific options deseq2_test = "Wald" deseq2_fit_type = "parametric" @@ -122,13 +136,13 @@ params { gsea_nperm = 1000 gsea_permute = 'phenotype' - gsea_scoring_scheme = 'weighted' - gsea_metric = 'Signal2Noise' + gsea_scoring_scheme = 'weighted' + gsea_metric = 'Signal2Noise' gsea_sort = 'real' gsea_order = 'descending' gsea_set_max = 500 gsea_set_min = 15 - + gsea_norm = 'meandiv' gsea_rnd_type = 'no_balance' gsea_make_sets = true @@ -136,18 +150,18 @@ params { gsea_num = 100 gsea_plot_top_x = 20 gsea_rnd_seed = 'timestamp' - gsea_save_rnd_lists = false + gsea_save_rnd_lists = false gsea_zip_report = false - + gsea_gene_sets = null // ShinyNGS shinyngs_build_app = true - shinyngs_guess_unlog_matrices = true + shinyngs_guess_unlog_matrices = true // Note: for shinyapps deployment, in addition to setting these values, // SHINYAPPS_TOKEN and SHINYAPPS_SECRET must be available to the - // environment, probably via Nextflow secrets + // environment, probably via Nextflow secrets shinyngs_deploy_to_shinyapps_io = false shinyngs_shinyapps_account = null shinyngs_shinyapps_app_name = null @@ -159,7 +173,6 @@ params { // Boilerplate options outdir = null - tracedir = "${params.outdir}/pipeline_info" publish_dir_mode = 'copy' email = null email_on_fail = null @@ -168,19 +181,15 @@ params { hook_url = null help = false version = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' - // Config options + config_profile_name = null + config_profile_description = null custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null config_profile_contact = null config_profile_url = null - config_profile_name = null - + // Max resource options // Defaults only, expecting to be overwritten @@ -188,6 +197,13 @@ params { max_cpus = 16 max_time = '240.h' + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes,igenomes_base' + validationShowHiddenParams = false + validate_params = true + } // Load base.config by default for all pipelines @@ -207,10 +223,12 @@ try { // } catch (Exception e) { // System.err.println("WARNING: Could not load nf-core/config/differentialabundance profiles: ${params.custom_config_base}/pipeline/differentialabundance.config") // } - - profiles { - debug { process.beforeScript = 'echo $HOSTNAME' } + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + } conda { conda.enabled = true docker.enabled = false @@ -219,6 +237,7 @@ profiles { podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } mamba { conda.enabled = true @@ -229,14 +248,17 @@ profiles { podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } docker { docker.enabled = true docker.userEmulation = true + conda.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } arm { docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' @@ -244,44 +266,78 @@ profiles { singularity { singularity.enabled = true singularity.autoMounts = true + conda.enabled = false docker.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } podman { podman.enabled = true + conda.enabled = false docker.enabled = false singularity.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } shifter { shifter.enabled = true + conda.enabled = false docker.enabled = false singularity.enabled = false podman.enabled = false charliecloud.enabled = false + apptainer.enabled = false } charliecloud { charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false docker.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false + charliecloud.enabled = false } gitpod { executor.name = 'local' - executor.cpus = 16 - executor.memory = 60.GB + executor.cpus = 4 + executor.memory = 8.GB } test { includeConfig 'conf/test.config' } test_nogtf { includeConfig 'conf/test_nogtf.config' } test_full { includeConfig 'conf/test_full.config' } affy { includeConfig 'conf/affy.config' } + rnaseq { includeConfig 'conf/rnaseq.config' } + soft {includeConfig 'conf/soft.config'} test_affy { includeConfig 'conf/test_affy.config' } + test_maxquant { includeConfig 'conf/test_maxquant.config' } + test_soft {includeConfig 'conf/test_soft.config' } } +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' + +// Nextflow plugins +plugins { + id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} // Load igenomes.config if required if (!params.igenomes_ignore) { @@ -289,8 +345,6 @@ if (!params.igenomes_ignore) { } else { params.genomes = [:] } - - // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -308,19 +362,19 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { @@ -329,8 +383,8 @@ manifest { homePage = 'https://github.com/nf-core/differentialabundance' description = 'Differential abundance analysis' mainScript = 'main.nf' - nextflowVersion = '!>=22.10.1' - version = '1.2.0' + nextflowVersion = '!>=23.04.0' + version = '1.3.0' doi = '10.5281/zenodo.7568000' } diff --git a/nextflow_schema.json b/nextflow_schema.json index d489a034..e50f8e88 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -24,12 +24,13 @@ "default": "rnaseq", "description": "A string identifying the technology used to produce the data", "help_text": "Currently 'rnaseq' or 'affy_array' may be specified.", - "enum": ["rnaseq", "affy_array"], + "enum": ["rnaseq", "affy_array", "maxquant", "geo_soft_file"], "fa_icon": "far fa-keyboard" }, "input": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/csv", "pattern": "^\\S+\\.(csv|tsv|txt)$", "schema": "assets/schema_input.json", @@ -81,6 +82,13 @@ "description": "Alternative to matrix: a compressed CEL files archive such as often found in GEO", "fa_icon": "fas fa-file-archive", "help_text": "Use this option to provide a raw archive of CEL files from Affymetrix arrays. Will be ignored if a matrix is specified." + }, + "querygse": { + "type": "string", + "default": "None", + "description": "Use SOFT files from GEO by providing the GSE study identifier", + "fa_icon": "fas fa-keyboard", + "help_text": "Use this option to provide a GSE study identifier." } } }, @@ -165,6 +173,23 @@ "description": "This parameter allows you to supply your own feature annotations. These can often be automatically derived from the GTF used upstream for RNA-seq, or from the Bioconductor annotation package (for affy arrays). ", "help_text": "This parameter allows you to supply your own feature annotations. These can often be automatically derived from the GTF used upstream for RNA-seq, or from the Bioconductor annotation package (for affy arrays). ", "fa_icon": "fas fa-align-justify" + }, + "features_gtf_feature_type": { + "type": "string", + "default": "transcript", + "description": "Where a GTF file is supplied, which feature type to use", + "fa_icon": "fas fa-keyboard" + }, + "features_gtf_table_first_field": { + "type": "string", + "default": "gene_id", + "description": "Where a GTF file is supplied, which field should go first in the converted output table", + "fa_icon": "fas fa-fast-backward" + }, + "features_log2_assays": { + "type": "string", + "description": "Of which assays to compute the log2. Not necessary for maxquant data as this is controlled by the pipeline.", + "help_text": "Either comma-separated of assay positions, e.g. '[1,2,3]', or empty list '[]' to not log any assay. If not set, will guess which assays need to be logged (those with a maximum > 20)." } }, "required": ["features_id_col", "features_name_col", "features_type"], @@ -230,6 +255,50 @@ }, "fa_icon": "fas fa-table" }, + "proteus_input_options": { + "title": "Proteus input options", + "type": "object", + "description": "Options for processing of proteomics MaxQuant tables with the Proteus R package", + "default": "", + "properties": { + "proteus_measurecol_prefix": { + "type": "string", + "default": "LFQ intensity ", + "description": "Prefix of the column names of the MaxQuant proteingroups table in which the intensity values are saved; the prefix has to be followed by the sample names that are also found in the samplesheet. Default: 'LFQ intensity '; take care to also consider trailing whitespace between prefix and samplenames." + }, + "proteus_norm_function": { + "type": "string", + "default": "normalizeMedian", + "description": "Normalization function to use on the MaxQuant intensities.", + "help_text": "'normalizeMedian' or 'normalizeQuantiles'", + "enum": ["normalizeMedian", "normalizeQuantiles"] + }, + "proteus_plotsd_method": { + "type": "string", + "default": "violin", + "description": "Which method to use for plotting sample distributions of the MaxQuant intensities; one of 'violin', 'dist', 'box'.", + "help_text": "'violin', 'dist' or 'box'", + "enum": ["violin", "dist", "box"] + }, + "proteus_plotmv_loess": { + "type": "boolean", + "default": true, + "description": "Should a loess line be added to the plot of mean-variance relationship of the conditions? Default: true." + }, + "proteus_palette_name": { + "type": "string", + "default": "Set1", + "help_text": "Check the content of `RColorBrewer::brewer.pal.info` from an R terminal for valid palette names.", + "description": "Valid R palette name", + "fa_icon": "fas fa-palette" + }, + "proteus_round_digits": { + "type": "number", + "default": -1, + "description": "Number of decimals to round the MaxQuant intensities to; default: -1 (will not round)." + } + } + }, "filtering": { "title": "Filtering", "type": "object", @@ -904,18 +973,11 @@ "description": "Genome annotation file in GTF format", "pattern": "^\\S+\\.gtf(\\.gz)?", "format": "file-path", + "exists": true, "mimetype": "text/plain", "help_text": "\"This parameter is *mandatory* if `--genome` is not specified.\"", "fa_icon": "fas fa-book" }, - "igenomes_base": { - "type": "string", - "format": "directory-path", - "description": "Directory / URL base for iGenomes references.", - "default": "s3://ngi-igenomes/igenomes", - "fa_icon": "fas fa-cloud-download-alt", - "hidden": true - }, "igenomes_ignore": { "type": "boolean", "description": "Do not load the iGenomes reference config.", @@ -1002,7 +1064,7 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } @@ -1018,7 +1080,8 @@ "help": { "type": "boolean", "description": "Display help text.", - "fa_icon": "fas fa-question-circle" + "fa_icon": "fas fa-question-circle", + "hidden": true }, "version": { "type": "boolean", @@ -1044,12 +1107,14 @@ "plaintext_email": { "type": "boolean", "description": "Send plain-text email instead of HTML.", - "fa_icon": "fas fa-remove-format" + "fa_icon": "fas fa-remove-format", + "hidden": true }, "monochrome_logs": { "type": "boolean", "description": "Do not use coloured log outputs.", - "fa_icon": "fas fa-palette" + "fa_icon": "fas fa-palette", + "hidden": true }, "hook_url": { "type": "string", @@ -1058,23 +1123,32 @@ "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", "hidden": true }, - "tracedir": { - "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs" - }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", "default": true, "fa_icon": "fas fa-check-square" }, - "show_hidden_params": { + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", + "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } } @@ -1095,6 +1169,9 @@ { "$ref": "#/definitions/affy_input_options" }, + { + "$ref": "#/definitions/proteus_input_options" + }, { "$ref": "#/definitions/filtering" }, diff --git a/tower.yml b/tower.yml index 098d132c..97da8a17 100644 --- a/tower.yml +++ b/tower.yml @@ -1,6 +1,8 @@ reports: "**/report/*.html": display: "Final differential abundance report" + "**/report/*.zip": + display: "Report bundle (markdown + input files)" pipeline_info/**: display: "Pipeline run info" "**/plots/exploratory/**/html/*.html": diff --git a/workflows/differentialabundance.nf b/workflows/differentialabundance.nf index f71e45b0..381c8085 100644 --- a/workflows/differentialabundance.nf +++ b/workflows/differentialabundance.nf @@ -1,12 +1,18 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS + PRINT PARAMS SUMMARY ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) +include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' + +def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) +def citation = '\n' + WorkflowMain.citation(workflow) + '\n' +def summary_params = paramsSummaryMap(workflow) + +// Print parameter summary log to screen +log.info logo + paramsSummaryLog(workflow) + citation -// Validate input parameters WorkflowDifferentialabundance.initialise(params, log) def checkPathParamList = [ params.input ] @@ -14,36 +20,59 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true // Check mandatory parameters def exp_meta = [ "id": params.study_name ] -if (params.input) { ch_input = Channel.of([ exp_meta, params.input ]) } else { exit 1, 'Input samplesheet not specified!' } - +if (params.input) { ch_input = Channel.of([ exp_meta, file(params.input, checkIfExists: true) ]) } else { exit 1, 'Input samplesheet not specified!' } + if (params.study_type == 'affy_array'){ - if (params.affy_cel_files_archive) { - ch_celfiles = Channel.of([ exp_meta, file(params.affy_cel_files_archive, checkIfExists: true) ]) + if (params.affy_cel_files_archive) { + ch_celfiles = Channel.of([ exp_meta, file(params.affy_cel_files_archive, checkIfExists: true) ]) } else { - error("CEL files archive not specified!") + error("CEL files archive not specified!") } -} else{ +} else if (params.study_type == 'maxquant') { - // If this is not an affy array, assume we're reading from a matrix - - if (params.matrix) { + // Should the user have enabled --gsea_run, throw an error + if (params.gsea_run) { + error("Cannot run GSEA for maxquant data; please set --gsea_run to false.") + } + if (!params.matrix) { + error("Input matrix not specified!") + } + matrix_file = file(params.matrix, checkIfExists: true) + + // Make channel for proteus + proteus_in = Channel.of([ file(params.input), matrix_file ]) +} else if (params.study_type == 'geo_soft_file'){ + + // To pull SOFT files from a GEO a GSE study identifer must be provided + + if (params.querygse && params.features_metadata_cols) { + ch_querygse = Channel.of([exp_meta, params.querygse]) + } else { + error("Query GSE not specified or features metadata columns not specified") + } +} else { + // If this is not microarray data or maxquant output, and this an RNA-seq dataset, + // then assume we're reading from a matrix + + if (params.study_type == "rnaseq" && params.matrix) { matrix_file = file(params.matrix, checkIfExists: true) ch_in_raw = Channel.of([ exp_meta, matrix_file]) - } else { + } else { error("Input matrix not specified!") } + } // Check optional parameters -if (params.control_features) { ch_control_features = file(params.control_features, checkIfExists: true) } else { ch_control_features = [[],[]] } -if (params.gsea_run) { +if (params.control_features) { ch_control_features = Channel.of([ exp_meta, file(params.control_features, checkIfExists: true)]).first() } else { ch_control_features = [[],[]] } +if (params.gsea_run) { if (params.gsea_gene_sets){ gene_sets_files = params.gsea_gene_sets.split(",") ch_gene_sets = Channel.of(gene_sets_files).map { file(it, checkIfExists: true) } } else { error("GSEA activated but gene set file not specified!") } -} +} report_file = file(params.report_file, checkIfExists: true) logo_file = file(params.logo_file, checkIfExists: true) @@ -77,20 +106,24 @@ include { TABULAR_TO_GSEA_CHIP } from '../modules/local/tabular_to_gsea_chip' include { GUNZIP as GUNZIP_GTF } from '../modules/nf-core/gunzip/main' include { UNTAR } from '../modules/nf-core/untar/main.nf' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' -include { SHINYNGS_APP } from '../modules/nf-core/shinyngs/app/main' -include { SHINYNGS_STATICEXPLORATORY as PLOT_EXPLORATORY } from '../modules/nf-core/shinyngs/staticexploratory/main' -include { SHINYNGS_STATICDIFFERENTIAL as PLOT_DIFFERENTIAL } from '../modules/nf-core/shinyngs/staticdifferential/main' -include { SHINYNGS_VALIDATEFOMCOMPONENTS as VALIDATOR } from '../modules/nf-core/shinyngs/validatefomcomponents/main' -include { DESEQ2_DIFFERENTIAL } from '../modules/nf-core/deseq2/differential/main' -include { LIMMA_DIFFERENTIAL } from '../modules/nf-core/limma/differential/main' -include { CUSTOM_MATRIXFILTER } from '../modules/nf-core/custom/matrixfilter/main' -include { ATLASGENEANNOTATIONMANIPULATION_GTF2FEATUREANNOTATION as GTF_TO_TABLE } from '../modules/nf-core/atlasgeneannotationmanipulation/gtf2featureannotation/main' -include { GSEA_GSEA } from '../modules/nf-core/gsea/gsea/main' +include { SHINYNGS_APP } from '../modules/nf-core/shinyngs/app/main' +include { SHINYNGS_STATICEXPLORATORY as PLOT_EXPLORATORY } from '../modules/nf-core/shinyngs/staticexploratory/main' +include { SHINYNGS_STATICDIFFERENTIAL as PLOT_DIFFERENTIAL } from '../modules/nf-core/shinyngs/staticdifferential/main' +include { SHINYNGS_VALIDATEFOMCOMPONENTS as VALIDATOR } from '../modules/nf-core/shinyngs/validatefomcomponents/main' +include { DESEQ2_DIFFERENTIAL as DESEQ2_NORM } from '../modules/nf-core/deseq2/differential/main' +include { DESEQ2_DIFFERENTIAL } from '../modules/nf-core/deseq2/differential/main' +include { LIMMA_DIFFERENTIAL } from '../modules/nf-core/limma/differential/main' +include { CUSTOM_MATRIXFILTER } from '../modules/nf-core/custom/matrixfilter/main' +include { ATLASGENEANNOTATIONMANIPULATION_GTF2FEATUREANNOTATION as GTF_TO_TABLE } from '../modules/nf-core/atlasgeneannotationmanipulation/gtf2featureannotation/main' +include { GSEA_GSEA } from '../modules/nf-core/gsea/gsea/main' include { CUSTOM_TABULARTOGSEAGCT } from '../modules/nf-core/custom/tabulartogseagct/main' -include { CUSTOM_TABULARTOGSEACLS } from '../modules/nf-core/custom/tabulartogseacls/main' -include { RMARKDOWNNOTEBOOK } from '../modules/nf-core/rmarkdownnotebook/main' -include { AFFY_JUSTRMA as AFFY_JUSTRMA_RAW } from '../modules/nf-core/affy/justrma/main' -include { AFFY_JUSTRMA as AFFY_JUSTRMA_NORM } from '../modules/nf-core/affy/justrma/main' +include { CUSTOM_TABULARTOGSEACLS } from '../modules/nf-core/custom/tabulartogseacls/main' +include { RMARKDOWNNOTEBOOK } from '../modules/nf-core/rmarkdownnotebook/main' +include { AFFY_JUSTRMA as AFFY_JUSTRMA_RAW } from '../modules/nf-core/affy/justrma/main' +include { AFFY_JUSTRMA as AFFY_JUSTRMA_NORM } from '../modules/nf-core/affy/justrma/main' +include { PROTEUS_READPROTEINGROUPS as PROTEUS } from '../modules/nf-core/proteus/readproteingroups/main' +include { GEOQUERY_GETGEO } from '../modules/nf-core/geoquery/getgeo/main' +include { ZIP as MAKE_REPORT_BUNDLE } from '../modules/nf-core/zip/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -105,10 +138,12 @@ workflow DIFFERENTIALABUNDANCE { // Set up some basic variables ch_versions = Channel.empty() - + // Channel for the contrasts file + ch_contrasts_file = Channel.from([[exp_meta, file(params.contrasts)]]) + // If we have affy array data in the form of CEL files we'll be deriving - // matrix and annotation from them - + // matrix and annotation from them + if (params.study_type == 'affy_array'){ // Uncompress the CEL files archive @@ -121,42 +156,81 @@ workflow DIFFERENTIALABUNDANCE { // Run affy to derive the matrix. Reset the meta so it can be used to // define a prefix for different matrix flavours - AFFY_JUSTRMA_RAW ( + AFFY_JUSTRMA_RAW ( ch_affy_input, - [[],[]] + [[],[]] ) - AFFY_JUSTRMA_NORM ( + AFFY_JUSTRMA_NORM ( ch_affy_input, - [[],[]] + [[],[]] ) // Fetch affy outputs and reset the meta - ch_in_raw = AFFY_JUSTRMA_RAW.out.expression + ch_in_raw = AFFY_JUSTRMA_RAW.out.expression ch_in_norm = AFFY_JUSTRMA_NORM.out.expression - + ch_affy_platform_features = AFFY_JUSTRMA_RAW.out.annotation - } + ch_versions = ch_versions + .mix(AFFY_JUSTRMA_RAW.out.versions) + + } else if (params.study_type == 'maxquant'){ + + // We'll be running Proteus once per unique contrast variable to generate plots + // TODO: there should probably be a separate plotting module in proteus to simplify this + + ch_contrast_variables = ch_contrasts_file + .splitCsv ( header:true, sep:(params.contrasts.endsWith('tsv') ? '\t' : ',')) + .map{ it.tail().first() } + .map{ + tuple('id': it.variable) + } + .unique() // uniquify to keep each contrast variable only once (in case it exists in multiple lines for blocking etc.) + + // Run proteus to import protein abundances + PROTEUS( + ch_contrast_variables.combine(proteus_in) + ) + + // Re-map the proteus output tables to the study ID as the tables are the same across contrasts, only one norm table will be necessary + ch_in_raw = PROTEUS.out.raw_tab + .first() + .map{ meta, matrix -> tuple(exp_meta, matrix) } + ch_in_norm = PROTEUS.out.norm_tab + .first() + .map{ meta, matrix -> tuple(exp_meta, matrix) } + + ch_versions = ch_versions.mix(PROTEUS.out.versions) + } else if(params.study_type == 'geo_soft_file'){ + + GEOQUERY_GETGEO(ch_querygse) + ch_in_norm = GEOQUERY_GETGEO.out.expression + ch_soft_features = GEOQUERY_GETGEO.out.annotation + + ch_versions = ch_versions + .mix(GEOQUERY_GETGEO.out.versions) + } //// Fetch or derive a feature annotation table // If user has provided a feature annotation table, use that - if (params.features){ - ch_features = Channel.of([ exp_meta, file(params.features, checkIfExists: true)]) + ch_features = Channel.of([ exp_meta, file(params.features, checkIfExists: true)]) } else if (params.study_type == 'affy_array'){ ch_features = ch_affy_platform_features + } else if(params.study_type == 'geo_soft_file') { + ch_features = ch_soft_features } else if (params.gtf){ // Get feature annotations from a GTF file, gunzip if necessary - + file_gtf_in = file(params.gtf) - file_gtf = [ [ "id": file_gtf_in.simpleName ], file_gtf_in ] + file_gtf = [ [ "id": file_gtf_in.simpleName ], file_gtf_in ] if ( params.gtf.endsWith('.gz') ){ GUNZIP_GTF(file_gtf) file_gtf = GUNZIP_GTF.out.gunzip ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions) - } + } // Get a features table from the GTF and combine with the matrix and sample // annotation (fom = features/ observations/ matrix) @@ -174,23 +248,30 @@ workflow DIFFERENTIALABUNDANCE { } else{ - // Otherwise we can just use the matrix input - matrix_as_anno_filename = "matrix_as_anno.${matrix_file.getExtension()}" - matrix_file.copyTo(matrix_as_anno_filename) - ch_features = Channel.of([ exp_meta, file(matrix_as_anno_filename)]) + // Otherwise we can just use the matrix input; save it to the workdir so that it does not + // just appear wherever the user runs the pipeline + matrix_as_anno_filename = "${workflow.workDir}/matrix_as_anno.${matrix_file.getExtension()}" + if (params.study_type == 'maxquant'){ + ch_features_matrix = ch_in_norm + } else { + ch_features_matrix = ch_in_raw + } + ch_features = ch_features_matrix + .map{ meta, matrix -> + matrix.copyTo(matrix_as_anno_filename) + [ meta, file(matrix_as_anno_filename) ] + } } - // Channel for the contrasts file - - ch_contrasts_file = Channel.from([[exp_meta, file(params.contrasts)]]) - // Check compatibility of FOM elements and contrasts - - if (params.study_type == 'affy_array'){ + if (params.study_type == 'affy_array' || params.study_type == 'maxquant'){ ch_matrices_for_validation = ch_in_raw .join(ch_in_norm) .map{tuple(it[0], [it[1], it[2]])} } + else if (params.study_type == 'geo_soft_file'){ + ch_matrices_for_validation = ch_in_norm + } else{ ch_matrices_for_validation = ch_in_raw } @@ -204,23 +285,30 @@ workflow DIFFERENTIALABUNDANCE { // For Affy, we've validated multiple input matrices for raw and norm, // we'll separate them out again here - if (params.study_type == 'affy_array'){ + if (params.study_type == 'affy_array' || params.study_type == 'maxquant'){ ch_validated_assays = VALIDATOR.out.assays .transpose() .branch { raw: it[1].name.contains('raw') - normalised: it[1].name.contains('normalised') + normalised: it[1].name =~ /normali[sz]ed/ } ch_raw = ch_validated_assays.raw ch_norm = ch_validated_assays.normalised - ch_matrix_for_differential = ch_norm - } else{ - ch_raw = VALIDATOR.out.assays + } + else if (params.study_type == 'geo_soft_file') { + ch_norm = VALIDATOR.out.assays + } + + if(params.study_type != 'rnaseq') { + ch_matrix_for_differential = ch_norm + } + else{ + ch_raw = VALIDATOR.out.assays ch_matrix_for_differential = ch_raw } // Split the contrasts up so we can run differential analyses and - // downstream plots separately. + // downstream plots separately. // Replace NA strings that might have snuck into the blocking column ch_contrasts = VALIDATOR.out.contrasts @@ -247,32 +335,38 @@ workflow DIFFERENTIALABUNDANCE { .join(CUSTOM_MATRIXFILTER.out.filtered) // -> meta, samplesheet, filtered matrix .first() - if (params.study_type == 'affy_array'){ + if (params.study_type == 'affy_array' || params.study_type == 'geo_soft_file' || params.study_type == 'maxquant'){ LIMMA_DIFFERENTIAL ( ch_contrasts, ch_samples_and_matrix ) - ch_differential = LIMMA_DIFFERENTIAL.out.results - + ch_differential = LIMMA_DIFFERENTIAL.out.results + ch_versions = ch_versions .mix(LIMMA_DIFFERENTIAL.out.versions) - + ch_processed_matrices = ch_norm .map{ it.tail() } .first() } else{ + DESEQ2_NORM ( + ch_contrasts.first(), + ch_samples_and_matrix, + ch_control_features + ) + // Run the DESeq differential module, which doesn't take the feature - // annotations + // annotations DESEQ2_DIFFERENTIAL ( ch_contrasts, ch_samples_and_matrix, ch_control_features ) - + // Let's make the simplifying assumption that the processed matrices from // the DESeq runs are the same across contrasts. We run the DESeq process // with matrices once for each contrast because DESeqDataSetFromMatrix() @@ -280,15 +374,20 @@ workflow DIFFERENTIALABUNDANCE { // blocking factors included differ. But the normalised and // variance-stabilised matrices are not (IIUC) impacted by the model. - ch_norm = DESEQ2_DIFFERENTIAL.out.normalised_counts.first() - ch_vst = DESEQ2_DIFFERENTIAL.out.vst_counts.first() - ch_differential = DESEQ2_DIFFERENTIAL.out.results - + ch_norm = DESEQ2_NORM.out.normalised_counts + ch_differential = DESEQ2_DIFFERENTIAL.out.results + ch_versions = ch_versions .mix(DESEQ2_DIFFERENTIAL.out.versions) - + ch_processed_matrices = ch_norm - .join(ch_vst) + if ('rlog' in params.deseq2_vs_method){ + ch_processed_matrices = ch_processed_matrices.join(DESEQ2_NORM.out.rlog_counts) + } + if ('vst' in params.deseq2_vs_method){ + ch_processed_matrices = ch_processed_matrices.join(DESEQ2_NORM.out.vst_counts) + } + ch_processed_matrices = ch_processed_matrices .map{ it.tail() } } @@ -297,27 +396,27 @@ workflow DIFFERENTIALABUNDANCE { // Currently, we're letting GSEA work on the expression data. In future we // will allow use of GSEA preranked instead, which will work with the fold // changes/ p values from DESeq2 - - if (params.gsea_run){ + + if (params.gsea_run){ // For GSEA, we need to convert normalised counts to a GCT format for // input, and process the sample sheet to generate class definitions - // (CLS) for the variable used in each contrast - + // (CLS) for the variable used in each contrast + CUSTOM_TABULARTOGSEAGCT ( ch_norm ) // TODO: update CUSTOM_TABULARTOGSEACLS for value channel input per new // guidlines (rather than meta usage employed here) - + ch_contrasts_and_samples = ch_contrasts .map{it[0]} // revert back to contrasts meta map .combine( VALIDATOR.out.sample_meta.map { it[1] } ) - - CUSTOM_TABULARTOGSEACLS(ch_contrasts_and_samples) + + CUSTOM_TABULARTOGSEACLS(ch_contrasts_and_samples) TABULAR_TO_GSEA_CHIP( VALIDATOR.out.feature_meta.map{ it[1] }, - [params.features_id_col, params.features_name_col] + [params.features_id_col, params.features_name_col] ) // The normalised matrix does not always have a contrast meta, so we @@ -330,12 +429,12 @@ workflow DIFFERENTIALABUNDANCE { .map{ tuple(it[1], it[0], it[2]) } .combine(ch_gene_sets) - GSEA_GSEA( + GSEA_GSEA( ch_gsea_inputs, - ch_gsea_inputs.map{ tuple(it[0].reference, it[0].target) }, // * + ch_gsea_inputs.map{ tuple(it[0].reference, it[0].target) }, // * TABULAR_TO_GSEA_CHIP.out.chip.first() ) - + // * Note: GSEA module currently uses a value channel for the mandatory // non-file arguments used to define contrasts, hence the indicated // usage of map to perform that transformation. An active subject of @@ -350,6 +449,7 @@ workflow DIFFERENTIALABUNDANCE { .mix(GSEA_GSEA.out.versions) } + // The exploratory plots are made by coloring by every unique variable used // to define contrasts @@ -359,21 +459,23 @@ workflow DIFFERENTIALABUNDANCE { } .unique() - ch_all_matrices = VALIDATOR.out.sample_meta // meta, samples + // For geoquery we've done no matrix processing and been supplied with the + // normalised matrix, which can be passed through to downstream analysis + + if(params.study_type == "geo_soft_file") { + ch_mat = ch_norm + }else{ + ch_mat = ch_raw.combine(ch_processed_matrices) + } + + ch_all_matrices = VALIDATOR.out.sample_meta // meta, samples .join(VALIDATOR.out.feature_meta) // meta, samples, features - .join(ch_raw) // meta, samples, features, raw matrix - .combine(ch_processed_matrices) // meta, samples, features, raw, norm, ... + .join(ch_mat) // meta, samples, features, raw, norm (or just norm) .map{ tuple(it[0], it[1], it[2], it[3..it.size()-1]) } .first() - - ch_contrast_variables - .combine(ch_all_matrices.map{ it.tail() }) - ch_contrast_variables - .combine(ch_all_matrices.map{ it.tail() }) - PLOT_EXPLORATORY( ch_contrast_variables .combine(ch_all_matrices.map{ it.tail() }) @@ -382,7 +484,7 @@ workflow DIFFERENTIALABUNDANCE { // Differential analysis using the results of DESeq2 PLOT_DIFFERENTIAL( - ch_differential, + ch_differential, ch_all_matrices ) @@ -396,7 +498,7 @@ workflow DIFFERENTIALABUNDANCE { CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') ) - + // Generate a list of files that will be used by the markdown report ch_report_file = Channel.from(report_file) @@ -409,14 +511,14 @@ workflow DIFFERENTIALABUNDANCE { ch_report_input_files = ch_all_matrices .map{ it.tail() } .map{it.flatten()} - .combine(ch_contrasts_file.map{it.tail()}) + .combine(VALIDATOR.out.contrasts.map{it.tail()}) .combine(CUSTOM_DUMPSOFTWAREVERSIONS.out.yml) .combine(ch_logo_file) .combine(ch_css_file) .combine(ch_citations_file) .combine(ch_differential.map{it[1]}.toList()) - - if (params.gsea_run){ + + if (params.gsea_run){ ch_report_input_files = ch_report_input_files .combine(ch_gsea_results .map{it.tail()}.flatMap().toList() @@ -424,7 +526,7 @@ workflow DIFFERENTIALABUNDANCE { } if (params.shinyngs_build_app){ - + // Make (and optionally deploy) the shinyngs app // Make a new contrasts file from the differential metas to guarantee the @@ -441,41 +543,53 @@ workflow DIFFERENTIALABUNDANCE { .combine(ch_differential.map{it[1]}.collect().map{[it]}) SHINYNGS_APP( - ch_all_matrices, // meta, samples, features, [ matrices ] - ch_app_differential, // meta, contrasts, [differential results] - params.exploratory_assay_names.split(',').findIndexOf { it == params.exploratory_final_assay } + 1 - ) + ch_all_matrices, // meta, samples, features, [ matrices ] + ch_app_differential, // meta, contrasts, [differential results] + params.exploratory_assay_names.split(',').findIndexOf { it == params.exploratory_final_assay } + 1 + ) ch_versions = ch_versions.mix(SHINYNGS_APP.out.versions) } // Make a params list - starting with the input matrices and the relevant // params to use in reporting - def report_file_names = [ 'observations', 'features' ] + + def report_file_names = [ 'observations', 'features' ] + params.exploratory_assay_names.split(',').collect { "${it}_matrix".toString() } + [ 'contrasts_file', 'versions_file', 'logo', 'css', 'citations' ] // Condition params reported on study type def params_pattern = ~/^(report|study|observations|features|filtering|exploratory|differential|deseq2|gsea).*/ - if (params.study_type == 'affy_array'){ + if (params.study_type == 'affy_array' || params.study_type == 'geo_soft_file'){ params_pattern = ~/^(report|study|observations|features|filtering|exploratory|differential|affy|limma|gsea).*/ - } + } + if (params.study_type == 'maxquant'){ + params_pattern = ~/^(report|study|observations|features|filtering|exploratory|differential|proteus|affy|limma|gsea).*/ + } ch_report_params = ch_report_input_files .map{ params.findAll{ k,v -> k.matches(params_pattern) } + - [report_file_names, it.collect{ f -> f.name}].transpose().collectEntries() + [report_file_names, it.collect{ f -> f.name}].transpose().collectEntries() } // Render the final report - + RMARKDOWNNOTEBOOK( ch_report_file, ch_report_params, ch_report_input_files ) + // Make a report bundle comprising the markdown document and all necessary + // input files + + MAKE_REPORT_BUNDLE( + RMARKDOWNNOTEBOOK.out.parameterised_notebook + .combine(ch_report_input_files) + .map{[it[0], it[1..-1]]} + ) + } /* @@ -488,6 +602,7 @@ workflow.onComplete { if (params.email || params.email_on_fail) { NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) } + NfcoreTemplate.dump_parameters(workflow, params) NfcoreTemplate.summary(workflow, params, log) if (params.hook_url) { NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log)