diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index b290e09..c8cbbcb 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -14,7 +14,11 @@ }, // Add the IDs of extensions you want installed when the container is created. - "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + "extensions": [ + "ms-python.python", + "ms-python.vscode-pylance", + "nf-core.nf-core-extensionpack" + ] } } } diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index dbc9754..782771b 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -16,7 +16,7 @@ If you'd like to write some code for mskcc/neoantigenpipeline, the standard work 1. Check that there isn't already an issue about your idea in the [mskcc/neoantigenpipeline issues](https://github.com/mskcc/neoantigenpipeline/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this 2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [mskcc/neoantigenpipeline repository](https://github.com/mskcc/neoantigenpipeline) to your GitHub account 3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) -4. Use `nf-core schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). +4. Use `nf-core pipelines schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). 5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/). @@ -37,7 +37,7 @@ There are typically two types of tests that run: ### Lint tests `nf-core` has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. -To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. +To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core pipelines lint ` command. If any failures or warnings are encountered, please follow the listed URL for more documentation. @@ -68,7 +68,7 @@ If you wish to contribute a new step, please use the following coding standards: 2. Write the process block (see below). 3. Define the output channel if needed (see below). 4. Add any new parameters to `nextflow.config` with a default (see below). -5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core schema build` tool). +5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core pipelines schema build` tool). 6. Add sanity checks and validation for all relevant parameters. 7. Perform local tests to validate that the new code works as expected. 8. If applicable, add a new test command in `.github/workflow/ci.yml`. @@ -79,11 +79,11 @@ If you wish to contribute a new step, please use the following coding standards: Parameters should be initialised / defined with default values in `nextflow.config` under the `params` scope. -Once there, use `nf-core schema build` to add to `nextflow_schema.json`. +Once there, use `nf-core pipelines schema build` to add to `nextflow_schema.json`. ### Default processes resource requirements -Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. +Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/main/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. @@ -96,7 +96,7 @@ Please use the following naming schemes, to make it easy to understand what is g ### Nextflow version bumping -If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core bump-version --nextflow . [min-nf-version]` +If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core pipelines bump-version --nextflow . [min-nf-version]` ### Images and figures diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 9dbf64e..1661fcc 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -9,46 +9,34 @@ body: description: A clear and concise description of what the bug is. validations: required: true + - type: textarea id: command_used attributes: label: Command used and terminal output - description: Steps to reproduce the behaviour. Please paste the command you used - to launch the pipeline and the output from your terminal. + description: Steps to reproduce the behaviour. Please paste the command you used to launch the pipeline and the output from your terminal. render: console - placeholder: "$ nextflow run ... - + placeholder: | + $ nextflow run ... Some output where something broke - " - type: textarea id: files attributes: label: Relevant files - description: "Please drag and drop the relevant files here. Create a `.zip` archive - if the extension is not allowed. - - Your verbose log file `.nextflow.log` is often useful _(this is a hidden file - in the directory where you launched the pipeline)_ as well as custom Nextflow - configuration files. + description: | + Please drag and drop the relevant files here. Create a `.zip` archive if the extension is not allowed. + Your verbose log file `.nextflow.log` is often useful _(this is a hidden file in the directory where you launched the pipeline)_ as well as custom Nextflow configuration files. - " - type: textarea id: system attributes: label: System information - description: "* Nextflow version _(eg. 23.04.0)_ - + description: | + * Nextflow version _(eg. 23.04.0)_ * Hardware _(eg. HPC, Desktop, Cloud)_ - * Executor _(eg. slurm, local, awsbatch)_ - - * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, - or Apptainer)_ - + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ * OS _(eg. CentOS Linux, macOS, Linux Mint)_ - * Version of mskcc/neoantigenpipeline _(eg. 1.1, 1.5, 1.8.2)_ - - " diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 24a404c..2c8ffc5 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -16,7 +16,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/mskcc/neoant - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/mskcc/neoantigenpipeline/tree/master/.github/CONTRIBUTING.md) -- [ ] Make sure your code lints (`nf-core lint`). +- [ ] Make sure your code lints (`nf-core pipelines lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 37175cf..e96b7cf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,9 +8,12 @@ on: branches: ["dev", "main", "master"] release: types: [published] + workflow_dispatch: env: NXF_ANSI_LOG: false + NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity + NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity concurrency: group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" @@ -18,30 +21,61 @@ concurrency: jobs: test: - name: Run pipeline with test data + name: "Run pipeline with test data (${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }})" # Only run on push if this is the nf-core dev branch (merged PRs) if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'mskcc/neoantigenpipeline') }}" - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 strategy: matrix: NXF_VER: - - "23.04.0" + - "24.04.0" - "latest-everything" + profile: + - "docker" + test_name: + - "test" + isMaster: + - ${{ github.base_ref == 'master' }} + exclude: + - isMaster: false + profile: "singularity" steps: - name: Check out pipeline code - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + - name: Set up Nextflow + uses: nf-core/setup-nextflow@v2 with: version: "${{ matrix.NXF_VER }}" - - name: Disk space cleanup + - name: Set up Apptainer + if: matrix.profile == 'singularity' + uses: eWaterCycle/setup-apptainer@main + + - name: Set up Singularity + if: matrix.profile == 'singularity' + run: | + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR + + - name: Set up Miniconda + if: matrix.profile == 'conda' + uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3 + with: + miniconda-version: "latest" + auto-update-conda: true + conda-solver: libmamba + channels: conda-forge,bioconda + + - name: Set up Conda + if: matrix.profile == 'conda' + run: | + echo $(realpath $CONDA)/condabin >> $GITHUB_PATH + echo $(realpath python) >> $GITHUB_PATH + + - name: Clean up Disk space uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - name: Run pipeline with test data - # TODO nf-core: You can customise CI pipeline run tests as required - # For example: adding multiple test runs with different parameters - # Remember that you can parallelise this by using strategy.matrix + - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} --outdir ./results diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index 08622fd..713dc3e 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -1,4 +1,4 @@ -name: Test successful pipeline download with 'nf-core download' +name: Test successful pipeline download with 'nf-core pipelines download' # Run the workflow when: # - dispatched manually @@ -8,12 +8,14 @@ on: workflow_dispatch: inputs: testbranch: - description: "The specific branch you wish to utilize for the test execution of nf-core download." + description: "The specific branch you wish to utilize for the test execution of nf-core pipelines download." required: true default: "dev" pull_request: types: - opened + - edited + - synchronize branches: - master pull_request_target: @@ -28,15 +30,20 @@ jobs: runs-on: ubuntu-latest steps: - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: "3.11" + python-version: "3.12" architecture: "x64" - - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 + + - name: Setup Apptainer + uses: eWaterCycle/setup-apptainer@4bb22c52d4f63406c49e94c804632975787312b3 # v2.0.0 with: - singularity-version: 3.8.3 + apptainer-version: 1.3.4 - name: Install dependencies run: | @@ -49,24 +56,64 @@ jobs: echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + - name: Make a cache directory for the container images + run: | + mkdir -p ./singularity_container_images + - name: Download the pipeline env: - NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_CACHEDIR: ./singularity_container_images run: | - nf-core download ${{ env.REPO_LOWERCASE }} \ + nf-core pipelines download ${{ env.REPO_LOWERCASE }} \ --revision ${{ env.REPO_BRANCH }} \ --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ --compress "none" \ --container-system 'singularity' \ - --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-library "quay.io" -l "docker.io" -l "community.wave.seqera.io" \ --container-cache-utilisation 'amend' \ - --download-configuration + --download-configuration 'yes' - name: Inspect download run: tree ./${{ env.REPOTITLE_LOWERCASE }} - - name: Run the downloaded pipeline + - name: Count the downloaded number of container images + id: count_initial + run: | + image_count=$(ls -1 ./singularity_container_images | wc -l | xargs) + echo "Initial container image count: $image_count" + echo "IMAGE_COUNT_INITIAL=$image_count" >> ${GITHUB_ENV} + + - name: Run the downloaded pipeline (stub) + id: stub_run_pipeline + continue-on-error: true env: - NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_CACHEDIR: ./singularity_container_images NXF_SINGULARITY_HOME_MOUNT: true run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + - name: Run the downloaded pipeline (stub run not supported) + id: run_pipeline + if: ${{ job.steps.stub_run_pipeline.status == failure() }} + env: + NXF_SINGULARITY_CACHEDIR: ./singularity_container_images + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results + + - name: Count the downloaded number of container images + id: count_afterwards + run: | + image_count=$(ls -1 ./singularity_container_images | wc -l | xargs) + echo "Post-pipeline run container image count: $image_count" + echo "IMAGE_COUNT_AFTER=$image_count" >> ${GITHUB_ENV} + + - name: Compare container image counts + run: | + if [ "${{ env.IMAGE_COUNT_INITIAL }}" -ne "${{ env.IMAGE_COUNT_AFTER }}" ]; then + initial_count=${{ env.IMAGE_COUNT_INITIAL }} + final_count=${{ env.IMAGE_COUNT_AFTER }} + difference=$((final_count - initial_count)) + echo "$difference additional container images were \n downloaded at runtime . The pipeline has no support for offline runs!" + tree ./singularity_container_images + exit 1 + else + echo "The pipeline can be downloaded successfully!" + fi diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index d2b130e..1087b5c 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 with: token: ${{ secrets.nf_core_bot_auth_token }} @@ -32,9 +32,9 @@ jobs: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} # Install and run pre-commit - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: 3.11 + python-version: "3.12" - name: Install pre-commit run: pip install pre-commit diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 1fcafe8..a502573 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -1,6 +1,6 @@ name: nf-core linting # This workflow is triggered on pushes and PRs to the repository. -# It runs the `nf-core lint` and markdown lint tests to ensure +# It runs the `nf-core pipelines lint` and markdown lint tests to ensure # that the code meets the nf-core guidelines. on: push: @@ -41,17 +41,32 @@ jobs: python-version: "3.12" architecture: "x64" + - name: read .nf-core.yml + uses: pietrobolcato/action-read-yaml@1.1.0 + id: read_yml + with: + config: ${{ github.workspace }}/.nf-core.yml + - name: Install dependencies run: | python -m pip install --upgrade pip - pip install nf-core + pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} + + - name: Run nf-core pipelines lint + if: ${{ github.base_ref != 'master' }} + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: nf-core -l lint_log.txt pipelines lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md - - name: Run nf-core lint + - name: Run nf-core pipelines lint --release + if: ${{ github.base_ref == 'master' }} env: GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} - run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + run: nf-core -l lint_log.txt pipelines lint --release --dir ${GITHUB_WORKSPACE} --markdown lint_results.md - name: Save PR number if: ${{ always() }} diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 40acc23..42e519b 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 + uses: dawidd6/action-download-artifact@bf251b5aa9c2f7eeb574a96ee720e24f801b7c11 # v6 with: workflow: linting.yml workflow_conclusion: completed diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml deleted file mode 100644 index d468aea..0000000 --- a/.github/workflows/release-announcements.yml +++ /dev/null @@ -1,75 +0,0 @@ -name: release-announcements -# Automatic release toot and tweet anouncements -on: - release: - types: [published] - workflow_dispatch: - -jobs: - toot: - runs-on: ubuntu-latest - steps: - - name: get topics and convert to hashtags - id: get_topics - run: | - curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ' >> $GITHUB_OUTPUT - - - uses: rzr/fediverse-action@master - with: - access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} - host: "mstdn.science" # custom host if not "mastodon.social" (default) - # GitHub event payload - # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release - message: | - Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! - - Please see the changelog: ${{ github.event.release.html_url }} - - ${{ steps.get_topics.outputs.GITHUB_OUTPUT }} #nfcore #openscience #nextflow #bioinformatics - - send-tweet: - runs-on: ubuntu-latest - - steps: - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 - with: - python-version: "3.10" - - name: Install dependencies - run: pip install tweepy==4.14.0 - - name: Send tweet - shell: python - run: | - import os - import tweepy - - client = tweepy.Client( - access_token=os.getenv("TWITTER_ACCESS_TOKEN"), - access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), - consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), - consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), - ) - tweet = os.getenv("TWEET") - client.create_tweet(text=tweet) - env: - TWEET: | - Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! - - Please see the changelog: ${{ github.event.release.html_url }} - TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} - TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} - TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} - TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} - - bsky-post: - runs-on: ubuntu-latest - steps: - - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 - with: - post: | - Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! - - Please see the changelog: ${{ github.event.release.html_url }} - env: - BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} - BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} - # diff --git a/.github/workflows/template_version_comment.yml b/.github/workflows/template_version_comment.yml new file mode 100644 index 0000000..e8aafe4 --- /dev/null +++ b/.github/workflows/template_version_comment.yml @@ -0,0 +1,46 @@ +name: nf-core template version comment +# This workflow is triggered on PRs to check if the pipeline template version matches the latest nf-core version. +# It posts a comment to the PR, even if it comes from a fork. + +on: pull_request_target + +jobs: + template_version: + runs-on: ubuntu-latest + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: Read template version from .nf-core.yml + uses: nichmor/minimal-read-yaml@v0.0.2 + id: read_yml + with: + config: ${{ github.workspace }}/.nf-core.yml + + - name: Install nf-core + run: | + python -m pip install --upgrade pip + pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} + + - name: Check nf-core outdated + id: nf_core_outdated + run: echo "OUTPUT=$(pip list --outdated | grep nf-core)" >> ${GITHUB_ENV} + + - name: Post nf-core template version comment + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 + if: | + contains(env.OUTPUT, 'nf-core') + with: + repo-token: ${{ secrets.NF_CORE_BOT_AUTH_TOKEN }} + allow-repeats: false + message: | + > [!WARNING] + > Newer version of the nf-core template is available. + > + > Your pipeline is using an old version of the nf-core template: ${{ steps.read_yml.outputs['nf_core_version'] }}. + > Please update your pipeline to the latest version. + > + > For more documentation on how to update your pipeline, please see the [nf-core documentation](https://github.com/nf-core/tools?tab=readme-ov-file#sync-a-pipeline-with-the-template) and [Synchronisation documentation](https://nf-co.re/docs/contributing/sync). + # diff --git a/.gitignore b/.gitignore index 5124c9a..a42ce01 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ results/ testing/ testing* *.pyc +null/ diff --git a/.gitpod.yml b/.gitpod.yml index 105a182..4611863 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -4,17 +4,14 @@ tasks: command: | pre-commit install --install-hooks nextflow self-update - - name: unset JAVA_TOOL_OPTIONS - command: | - unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack - - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code + #- esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - mechatroner.rainbow-csv # Highlight columns in csv files in different colors - # - nextflow.nextflow # Nextflow syntax highlighting + - nextflow.nextflow # Nextflow syntax highlighting - oderwat.indent-rainbow # Highlight indentation level - streetsidesoftware.code-spell-checker # Spelling checker for source code - charliermarsh.ruff # Code linter Ruff diff --git a/.nf-core.yml b/.nf-core.yml index 0374a1a..b40c4f4 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,4 +1,4 @@ -nf_core_version: 2.14.1 +bump_version: null lint: files_exist: - CODE_OF_CONDUCT.md @@ -16,10 +16,19 @@ lint: - .github/ISSUE_TEMPLATE/bug_report.yml multiqc_config: - report_comment - nextflow_config: - - manifest.name - - manifest.homePage + nextflow_config: false + included_configs: false +nf_core_version: 3.0.2 +org_path: msk repository_type: pipeline template: - prefix: mskcc - skip: [] + author: Nikhil Kumar + description: Pipeline for computing neoantigen qualities from DNA and RNA-Seq data + force: false + is_nfcore: false + name: neoantigenpipeline + org: mskcc + outdir: . + skip_features: [] + version: 1.0.0 +update: null diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index af57081..9e9f0e1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,8 +3,11 @@ repos: rev: "v3.1.0" hooks: - id: prettier + additional_dependencies: + - prettier@3.2.5 + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python - rev: "2.7.3" + rev: "3.0.3" hooks: - id: editorconfig-checker alias: ec diff --git a/.prettierignore b/.prettierignore index 191016c..94f1b1b 100644 --- a/.prettierignore +++ b/.prettierignore @@ -11,3 +11,6 @@ testing* *.pyc bin/ README.md +modules/nf-core/* +.github/* +subworkflows/nf-core/* diff --git a/.prettierrc b/.prettierrc new file mode 100644 index 0000000..0967ef4 --- /dev/null +++ b/.prettierrc @@ -0,0 +1 @@ +{} diff --git a/README.md b/README.md index 2a6977b..e0ff542 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,14 @@ -> [![GitHub Actions CI Status](https://github.com/mskcc/neoantigen-pipeline/actions/workflows/ci.yml/badge.svg)](https://github.com/mskcc/neoantigen-pipeline/actions/workflows/ci.yml) > [![GitHub Actions Linting Status](https://github.com/mskcc/neoantigen-pipeline/actions/workflows/linting.yml/badge.svg)](https://github.com/mskcc/neoantigen-pipeline/actions/workflows/linting.yml) > [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) +# mskcc/neoantigenpipeline -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) +[![GitHub Actions CI Status](https://github.com/mskcc/neoantigenpipeline/actions/workflows/ci.yml/badge.svg)](https://github.com/mskcc/neoantigenpipeline/actions/workflows/ci.yml) +[![GitHub Actions Linting Status](https://github.com/mskcc/neoantigenpipeline/actions/workflows/linting.yml/badge.svg)](https://github.com/mskcc/neoantigenpipeline/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) + +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) -[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/mskcc/neoantigen-pipeline) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/mskcc/neoantigenpipeline) ## Introduction @@ -23,9 +27,6 @@ > [!NOTE] > If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. - - ```bash nextflow run mskcc/neoantigenpipeline \ -profile prod, \ @@ -49,8 +48,7 @@ nextflow run mskcc/neoantigenpipeline \ ``` > [!WARNING] -> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; -> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files). ## Credits @@ -78,7 +76,7 @@ If you would like to contribute to this pipeline, please see the [contributing g An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. -This pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE). +This pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE). > **The nf-core framework for community-curated bioinformatics pipelines.** > diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 07855f8..375d380 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,8 +1,6 @@ report_comment: > - This report has been generated by the mskcc/neoantigenpipeline analysis pipeline. - report_section_order: "mskcc-neoantigenpipeline-methods-description": order: -1000 diff --git a/assets/schema_input.json b/assets/schema_input.json index 27bd597..2bb6eb6 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -1,5 +1,5 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/mskcc/neoantigenpipeline/master/assets/schema_input.json", "title": "mskcc/neoantigenpipeline pipeline - params.input schema", "description": "Schema for the file provided with params.input", @@ -31,7 +31,7 @@ "type": "string", "format": "file-path", "exists": true, - "pattern": "winners.hla.txt$", + "pattern": "^\\S+.txt$", "errorMessage": "HLA_file output by polysolver." } }, diff --git a/assets/workflow_diagram.png b/assets/workflow_diagram.png index 29dfc30..a0693ed 100644 Binary files a/assets/workflow_diagram.png and b/assets/workflow_diagram.png differ diff --git a/conf/base.config b/conf/base.config index 96257e7..dddd807 100644 --- a/conf/base.config +++ b/conf/base.config @@ -9,11 +9,10 @@ */ process { - // TODO nf-core: Check the defaults for all processes - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 1 * task.attempt } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 @@ -27,30 +26,30 @@ process { // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } } withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 12.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 2 * task.attempt } + memory = { 12.GB * task.attempt } + time = { 4.h * task.attempt } } withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + cpus = { 6 * task.attempt } + memory = { 36.GB * task.attempt } + time = { 8.h * task.attempt } } withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 72.h * task.attempt, 'time' ) } + cpus = { 12 * task.attempt } + memory = { 72.GB * task.attempt } + time = { 72.h * task.attempt } } withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } + time = { 20.h * task.attempt } } withLabel:process_high_memory { - memory = { check_max( 200.GB * task.attempt, 'memory' ) } + memory = { 200.GB * task.attempt } } withLabel:error_ignore { errorStrategy = 'ignore' diff --git a/conf/igenomes_ignored.config b/conf/igenomes_ignored.config new file mode 100644 index 0000000..b4034d8 --- /dev/null +++ b/conf/igenomes_ignored.config @@ -0,0 +1,9 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for iGenomes paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Empty genomes dictionary to use when igenomes is ignored. +---------------------------------------------------------------------------------------- +*/ + +params.genomes = [:] diff --git a/conf/modules.config b/conf/modules.config index ca2d9da..3d7fb26 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -11,7 +11,6 @@ */ process { - publishDir = [ path: { "${params.outdir}/${task.tag}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, mode: params.publish_dir_mode, @@ -22,12 +21,11 @@ process { ext.args = '-f facets' } - withName: 'PHYLOWGS_MULTIEVOLVE' { - ext.args = "--burnin-samples ${params.phylo_burnin_samples} --mcmc-samples ${params.phylo_mcmc_samples}" + ext.args = "--burnin-samples ${params.phylo_burnin_samples} --mcmc-samples ${params.phylo_mcmc_samples} --num-chains ${params.phylo_num_chains}" } withName: 'PHYLOWGS_WRITERESULTS' { - ext.args = "--max-multiprimary 1.0" + ext.args = '--max-multiprimary 1.0' } } diff --git a/conf/prod.config b/conf/prod.config index 57ca622..ebee506 100644 --- a/conf/prod.config +++ b/conf/prod.config @@ -18,22 +18,26 @@ process { executor = 'lsf' queueSize = 500 perJobMemLimit = true + resourceLimits = [ + cpus: 5, + memory: '10.GB', + time: '100.h' + ] } params { config_profile_name = 'Prod profile' config_profile_description = 'Minimal conf to run the pipeline' - // Limit resources so that this can run on GitHub Actions - max_cpus = 5 - max_memory = '10.GB' - max_time = '100.h' + netmhc3 = true // Genome references genome = 'GRCh37' phylo_burnin_samples = 1000 phylo_mcmc_samples = 2500 - iedbfasta = 'https://raw.githubusercontent.com/mskcc-omics-workflows/test-datasets/neoantigen/neoantigen/neoantigenEditing/data/iedb.fasta' + phylo_num_chains = 15 + + iedbfasta = 'https://raw.githubusercontent.com/mskcc/NeoantigenEditing/refs/heads/main/data/iedb.fasta' cds = 'https://github.com/mskcc-omics-workflows/test-datasets/raw/neoantigen/neoantigen/Homo_sapiens.GRCh37.75.cds.all.fa.gz' cdna = 'https://github.com/mskcc-omics-workflows/test-datasets/raw/neoantigen/neoantigen/Homo_sapiens.GRCh37.75.cdna.all.fa.gz' } diff --git a/conf/test.config b/conf/test.config index 548f4ae..89f39d1 100644 --- a/conf/test.config +++ b/conf/test.config @@ -11,16 +11,18 @@ */ nextflow.enable.moduleBinaries = true +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} params { config_profile_name = 'Test profile' config_profile_description = 'Minimal test dataset to check pipeline function' - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - // Input data input = "${projectDir}/assets/samplesheet.csv" @@ -28,6 +30,8 @@ params { genome = 'GRCh37' phylo_burnin_samples = 2 phylo_mcmc_samples = 2 + phylo_num_chains = 2 + netmhc3 = true iedbfasta = 'https://raw.githubusercontent.com/mskcc-omics-workflows/test-datasets/neoantigen/neoantigen/neoantigenEditing/data/iedb.fasta' cds = 'https://github.com/mskcc-omics-workflows/test-datasets/raw/neoantigen/neoantigen/Homo_sapiens.GRCh37.75.cds.all.fa.gz' diff --git a/conf/test_full.config b/conf/test_full.config index 51b5fcc..021ed6a 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -17,7 +17,7 @@ params { // Input data for full size test // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' // Genome references genome = 'R64-1-1' diff --git a/docs/usage.md b/docs/usage.md index e8f5eda..e0fa82c 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -60,9 +60,9 @@ The above pipeline run specified with a params file in yaml format: nextflow run mskcc/neoantigenpipeline -profile docker -params-file params.yaml ``` -with `params.yaml` containing: +with: -```yaml +```yaml title="params.yaml" input: './samplesheet.csv' outdir: './results/' <...> @@ -118,11 +118,25 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - `test` - A profile with a complete configuration for automated testing - Includes links to test data so needs no other parameters +- `prod` + - A profile with a complete configuration for running on production + - Includes links to pipeline resources - `docker` - A generic configuration profile to be used with [Docker](https://docker.com/) - `singularity` - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) - - +- `podman` + - A generic configuration profile to be used with [Podman](https://podman.io/) +- `shifter` + - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) +- `charliecloud` + - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `wave` + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). +- `conda` + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. ### `-resume` @@ -162,14 +176,6 @@ See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). -## Azure Resource Requests - -To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. -We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. - -Note that the choice of VM size depends on your quota and the overall workload during the analysis. -For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). - ## Running in the background Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. diff --git a/main.nf b/main.nf index c9d5b36..003aa36 100644 --- a/main.nf +++ b/main.nf @@ -7,8 +7,6 @@ ---------------------------------------------------------------------------------------- */ -nextflow.enable.dsl = 2 - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS @@ -18,7 +16,6 @@ nextflow.enable.dsl = 2 include { NEOANTIGENPIPELINE } from './workflows/neoantigenpipeline' include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_neoantigenpipeline_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_neoantigenpipeline_pipeline' - include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_neoantigenpipeline_pipeline' /* @@ -54,9 +51,9 @@ workflow MSKCC_NEOANTIGENPIPELINE { NEOANTIGENPIPELINE ( samplesheet ) - emit: - out = NEOANTIGENPIPELINE.out.neo_out // channel: /path/to/multiqc_report.html + out = NEOANTIGENPIPELINE.out.neo_out + tsv = NEOANTIGENPIPELINE.out.tsv_out } /* @@ -68,13 +65,11 @@ workflow MSKCC_NEOANTIGENPIPELINE { workflow { main: - // // SUBWORKFLOW: Run initialisation tasks // PIPELINE_INITIALISATION ( params.version, - params.help, params.validate_params, params.monochrome_logs, args, @@ -88,7 +83,6 @@ workflow { MSKCC_NEOANTIGENPIPELINE ( PIPELINE_INITIALISATION.out.samplesheet ) - // // SUBWORKFLOW: Run completion tasks // diff --git a/modules.json b/modules.json index bf1a62f..107a356 100644 --- a/modules.json +++ b/modules.json @@ -7,62 +7,77 @@ "msk": { "neoantigenediting/aligntoiedb": { "branch": "develop", - "git_sha": "cac9c047e374ee259fb612ba5816e7e6aae6b86f", - "installed_by": ["neoantigen_editing"] + "git_sha": "34505c4c67eabebab927561d3a8fca87c9efe788", + "installed_by": ["modules", "neoantigen_editing"] }, "neoantigenediting/computefitness": { "branch": "develop", - "git_sha": "1f65c2ecdc5010549055ff7f4e6b8bccee48d4ae", - "installed_by": ["neoantigen_editing"] + "git_sha": "05e49188ee9407e1b51dfb1a49d8b6133e9276bc", + "installed_by": ["modules", "neoantigen_editing"] + }, + "neoantigenutils/convertannotjson": { + "branch": "develop", + "git_sha": "34505c4c67eabebab927561d3a8fca87c9efe788", + "installed_by": ["modules"] }, "neoantigenutils/formatnetmhcpan": { "branch": "develop", - "git_sha": "c5d1252252e15555abcc82ea537cebeb281a1856", - "installed_by": ["netmhcstabandpan"] + "git_sha": "e6bbb12a2dc237b9ea18163e96dbe9d780ddce5f", + "installed_by": ["modules", "netmhcstabandpan"] }, "neoantigenutils/generatehlastring": { "branch": "develop", - "git_sha": "33f0bd33095fa15016ee24f4fb4d61e896dbb970", - "installed_by": ["netmhcstabandpan"] + "git_sha": "34505c4c67eabebab927561d3a8fca87c9efe788", + "installed_by": ["modules", "netmhcstabandpan"] }, "neoantigenutils/generatemutfasta": { "branch": "develop", - "git_sha": "bb7975c796ab9a2d7a45ef733a6a226a0f5ad74a", - "installed_by": ["netmhcstabandpan"] + "git_sha": "e6bbb12a2dc237b9ea18163e96dbe9d780ddce5f", + "installed_by": ["modules", "netmhcstabandpan"] }, "neoantigenutils/neoantigeninput": { - "branch": "neoantigen", - "git_sha": "d66d3e2c7d132efe8bbde0c7e8a072b0f974b085", + "branch": "develop", + "git_sha": "003587a171d6cfa80bc894950d212add9f206f88", "installed_by": ["modules"] }, - "netmhcpan": { + "netmhc3": { "branch": "develop", + "git_sha": "9baf08c136733d9e4c0b16c169b7eebfb98d3291", + "installed_by": ["modules", "netmhcstabandpan"] + }, + "netmhcpan": { + "branch": "main", "git_sha": "503abeb67260f060d8228221b07d743aa4180345", + "installed_by": ["modules"] + }, + "netmhcpan4": { + "branch": "develop", + "git_sha": "e64e6e100302131d575881363a9632936499918d", "installed_by": ["modules", "netmhcstabandpan"] }, "netmhcstabpan": { "branch": "develop", - "git_sha": "c1a473f8bc08f778269a36ab62d5adf24357225f", + "git_sha": "3c39dd8d3d0a92aa33fda4352749e35fc22d3e87", "installed_by": ["modules", "netmhcstabandpan"] }, "phylowgs/createinput": { "branch": "develop", - "git_sha": "b031249dcf4279606c25e626da2a628756e75e8a", + "git_sha": "05e49188ee9407e1b51dfb1a49d8b6133e9276bc", "installed_by": ["phylowgs"] }, "phylowgs/multievolve": { "branch": "develop", - "git_sha": "535662d391a3533dea3b11c462c14799227e08b2", + "git_sha": "980c1328a92a6cc547834ccc361c5187d0927944", "installed_by": ["phylowgs"] }, "phylowgs/parsecnvs": { "branch": "develop", - "git_sha": "8471691d7c29bc2f5f4fb92279c94fb2640b6c38", + "git_sha": "34505c4c67eabebab927561d3a8fca87c9efe788", "installed_by": ["phylowgs"] }, "phylowgs/writeresults": { "branch": "develop", - "git_sha": "6d27f08bf649e8680ace321d3127dcdf0e210973", + "git_sha": "34505c4c67eabebab927561d3a8fca87c9efe788", "installed_by": ["phylowgs"] } } @@ -76,7 +91,7 @@ }, "netmhcstabandpan": { "branch": "develop", - "git_sha": "d60211568e3709e9284bc06eef938e361d474d08", + "git_sha": "307cbffd9077e238176acb5044f9db85784e8aad", "installed_by": ["subworkflows"] }, "phylowgs": { @@ -92,7 +107,7 @@ "nf-core": { "multiqc": { "branch": "master", - "git_sha": "b80f5fd12ff7c43938f424dd76392a2704fa2396", + "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", "installed_by": ["modules"] } } @@ -101,17 +116,17 @@ "nf-core": { "utils_nextflow_pipeline": { "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "git_sha": "3aa0aec1d52d492fe241919f0c6100ebf0074082", "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", + "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba", "installed_by": ["subworkflows"] }, - "utils_nfvalidation_plugin": { + "utils_nfschema_plugin": { "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "git_sha": "bbd5a41f4535a8defafe6080e00ea74c45f4f96c", "installed_by": ["subworkflows"] } } diff --git a/modules/msk/neoantigenediting/aligntoiedb/environment.yml b/modules/msk/neoantigenediting/aligntoiedb/environment.yml index fb0fef8..4c59b93 100644 --- a/modules/msk/neoantigenediting/aligntoiedb/environment.yml +++ b/modules/msk/neoantigenediting/aligntoiedb/environment.yml @@ -1,9 +1,7 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "neoantigenediting_aligntoiedb" channels: - conda-forge - bioconda - - defaults dependencies: - - "NEOANTIGENEDITING" + - "YOUR-TOOL=HERE" diff --git a/modules/msk/neoantigenediting/aligntoiedb/meta.yml b/modules/msk/neoantigenediting/aligntoiedb/meta.yml index 77d6121..f209858 100644 --- a/modules/msk/neoantigenediting/aligntoiedb/meta.yml +++ b/modules/msk/neoantigenediting/aligntoiedb/meta.yml @@ -1,4 +1,3 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "neoantigenediting_aligntoiedb" description: Align neoantigens to the IEDB file @@ -8,40 +7,43 @@ keywords: - IEDB tools: - "neoantigenediting": - description: "Code for computing neoantigen qualities and for performing clone composition predictions." + description: + "Code for computing neoantigen qualities and for performing clone + composition predictions." homepage: "https://www.nature.com/articles/s41586-022-04735-9" tool_dev_url: "https://github.com/LukszaLab/NeoantigenEditing" + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - patient_data: - type: file - description: Patient data consisting of mutation, neoantigen, and tree information - pattern: "*.json" - - iedb_fasta: - type: file - description: IEDB epitopes used for analysis - pattern: "*.fasta" - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - patient_data: + type: file + description: Patient data consisting of mutation, neoantigen, and tree information + pattern: "*.json" + - - iedb_fasta: + type: file + description: IEDB epitopes used for analysis + pattern: "*.fasta" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - iedb_alignment: - type: file - description: IEDB alignment file - pattern: "iedb_alignments_*.txt" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - iedb_alignments_*.txt: + type: file + description: IEDB alignment file + pattern: "iedb_alignments_*.txt" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@nikhil" maintainers: diff --git a/modules/msk/neoantigenediting/computefitness/environment.yml b/modules/msk/neoantigenediting/computefitness/environment.yml index d737bc6..4c59b93 100644 --- a/modules/msk/neoantigenediting/computefitness/environment.yml +++ b/modules/msk/neoantigenediting/computefitness/environment.yml @@ -1,9 +1,7 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "neoantigenediting_computefitness" channels: - conda-forge - bioconda - - defaults dependencies: - - "NEOANTIGENEDITING" + - "YOUR-TOOL=HERE" diff --git a/modules/msk/neoantigenediting/computefitness/meta.yml b/modules/msk/neoantigenediting/computefitness/meta.yml index 0a7febb..e421c76 100644 --- a/modules/msk/neoantigenediting/computefitness/meta.yml +++ b/modules/msk/neoantigenediting/computefitness/meta.yml @@ -1,4 +1,3 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "neoantigenediting_computefitness" description: Compute fitness of the neoantigens @@ -8,40 +7,44 @@ keywords: - fitness tools: - "neoantigenediting": - description: "Code for computing neoantigen qualities and for performing clone composition predictions." + description: + "Code for computing neoantigen qualities and for performing clone + composition predictions." homepage: "https://www.nature.com/articles/s41586-022-04735-9" tool_dev_url: "https://github.com/LukszaLab/NeoantigenEditing" + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - patient_data: - type: file - description: Patient data consisting of mutation, neoantigen, and tree information - pattern: "*.json" - - alignment: - type: file - description: IEDB alignment file - pattern: "iedb_alignments_*.txt" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - patient_data: + type: file + description: Patient data consisting of mutation, neoantigen, and tree information + pattern: "*.json" + - alignment_file: + type: file + description: IEDB alignment file + pattern: "iedb_alignments_*.txt" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - annotated_output: - type: file - description: Output containing neoantigen quality scores - pattern: "*_annotated.json" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*_annotated.json": + type: file + description: Output containing neoantigen quality scores + pattern: "*_annotated.json" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@nikhil" maintainers: diff --git a/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py b/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py index f278817..ea09073 100755 --- a/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py +++ b/modules/msk/neoantigenediting/computefitness/resources/usr/bin/EpitopeDistance.py @@ -10,7 +10,7 @@ import json import os - +#% class EpitopeDistance(object): """Base class for epitope crossreactivity. diff --git a/modules/msk/neoantigenutils/convertannotjson/environment.yml b/modules/msk/neoantigenutils/convertannotjson/environment.yml new file mode 100644 index 0000000..ffc15a2 --- /dev/null +++ b/modules/msk/neoantigenutils/convertannotjson/environment.yml @@ -0,0 +1,6 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "YOUR-TOOL=HERE" diff --git a/modules/msk/neoantigenutils/convertannotjson/main.nf b/modules/msk/neoantigenutils/convertannotjson/main.nf new file mode 100644 index 0000000..4aa6b9f --- /dev/null +++ b/modules/msk/neoantigenutils/convertannotjson/main.nf @@ -0,0 +1,44 @@ +process NEOANTIGENUTILS_CONVERTANNOTJSON { + tag "$meta.id" + label 'process_single' + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://mskcc/neoantigen-utils-base:1.0.0': + 'docker.io/mskcc/neoantigen-utils-base:1.0.0' }" + + input: + tuple val(meta), path(annotatedJSON) + + output: + tuple val(meta), path("*.tsv"), emit: neoantigenTSV + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + convertannotjson.py \ + --json_file ${annotatedJSON} \ + --output_file ${prefix}_neoantigens.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + convertannotjson: \$(echo \$(convertannotjson.py -v)) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + + touch ${prefix}_neoantigens.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + convertannotjson: \$(echo \$(convertannotjson.py -v)) + END_VERSIONS + """ +} diff --git a/modules/msk/neoantigenutils/convertannotjson/meta.yml b/modules/msk/neoantigenutils/convertannotjson/meta.yml new file mode 100644 index 0000000..5c77149 --- /dev/null +++ b/modules/msk/neoantigenutils/convertannotjson/meta.yml @@ -0,0 +1,47 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "neoantigenutils_convertannotjson" +description: + Takes the output of the neoantigen ediitng subworkflow and converts the + annotated neoantigens to tsv format. +keywords: + - neoantigen + - tsv + - peptides +tools: + - neoantigen_utils: + description: "Collection of helper scripts for neoantigen processing" + documentation: "https://github.com/mskcc-omics-workflows/modules" + licence: [""] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information. + e.g. `[ id:'sample1']` + - annotatedJSON: + type: file + description: Json annotated by the neoantigenediting subworkflow + pattern: "*annotated.json" +output: + - neoantigenTSV: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1']` + - "*.tsv": + type: file + description: A reformatted file of neoantigens, now in TSV format! + pattern: "*.{tsv}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@johnoooh" +maintainers: + - "@johnoooh" + - "@nikhil" diff --git a/modules/msk/neoantigenutils/convertannotjson/resources/usr/bin/convertannotjson.py b/modules/msk/neoantigenutils/convertannotjson/resources/usr/bin/convertannotjson.py new file mode 100755 index 0000000..60c9d8a --- /dev/null +++ b/modules/msk/neoantigenutils/convertannotjson/resources/usr/bin/convertannotjson.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 + +import json +import argparse + +VERSION = 1.0 + +def process_json_file(json_file_path,output_file_path): + with open(json_file_path, "r") as json_file: + data = json.load(json_file) + + # Define the TSV header + tsv_header = ["id", "mutation_id", "HLA_gene_id", "sequence", "WT_sequence", "mutated_position", "Kd", "KdWT", "R", "logC", "logA", "quality", "git_branch"] + + # Convert JSON to TSV + tsv_lines = [] + tsv_lines.append("\t".join(tsv_header)) + + for neoantigen in data["neoantigens"]: + tsv_lines.append("\t".join(str(neoantigen.get(field, "")) for field in tsv_header[:-1])) + + tsv_output = "\n".join(tsv_lines) + + # Write the TSV output to a file + with open(output_file_path, "w") as tsv_file: + tsv_file.write(tsv_output) + +def main(): + parser = argparse.ArgumentParser(description="Process an annotated JSON file and output TSV format.") + parser.add_argument("--json_file", help="Path to the annotated JSON file") + parser.add_argument("--output_file", help="Path to the output TSV file") + parser.add_argument( + "-v", "--version", action="version", version="v{}".format(VERSION) + ) + args = parser.parse_args() + + process_json_file(args.json_file,args.output_file) + +if __name__ == "__main__": + main() diff --git a/modules/msk/neoantigenutils/convertannotjson/tests/main.nf.test b/modules/msk/neoantigenutils/convertannotjson/tests/main.nf.test new file mode 100644 index 0000000..74d99d0 --- /dev/null +++ b/modules/msk/neoantigenutils/convertannotjson/tests/main.nf.test @@ -0,0 +1,62 @@ +nextflow_process { + + name "Test Process NEOANTIGENUTILS_CONVERTANNOTJSON" + script "../main.nf" + process "NEOANTIGENUTILS_CONVERTANNOTJSON" + + tag "modules" + tag "modules_nfcore" + tag "neoantigenutils" + tag "neoantigenutils/convertannotjson" + tag "modules_msk" + + test("neoantigenutils_convertannotjson - output(test) - tsv") { + + when { + + process { + """ + + input[0] = [ + [ id:'test'], // meta map + file(params.test_data_mskcc['neoantigen']['test_annotated'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match()} + ) + } + + } + + test("neoantigenutils_convertannotjson - output(test) - tsv - stub") { + + options "-stub" + + when { + + process { + """ + + input[0] = [ + [ id:'test'], // meta map + file('test_annotated') + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} diff --git a/modules/msk/neoantigenutils/convertannotjson/tests/main.nf.test.snap b/modules/msk/neoantigenutils/convertannotjson/tests/main.nf.test.snap new file mode 100644 index 0000000..061ab9d --- /dev/null +++ b/modules/msk/neoantigenutils/convertannotjson/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "neoantigenutils_convertannotjson - output(test) - tsv": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_neoantigens.tsv:md5,4931fb72bba8bbeb3bc6cef19b99b01a" + ] + ], + "1": [ + "versions.yml:md5,e36ea44c5cc6130f40b1a100f7e84fb1" + ], + "neoantigenTSV": [ + [ + { + "id": "test" + }, + "test_neoantigens.tsv:md5,4931fb72bba8bbeb3bc6cef19b99b01a" + ] + ], + "versions": [ + "versions.yml:md5,e36ea44c5cc6130f40b1a100f7e84fb1" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-24T16:49:37.133693" + }, + "neoantigenutils_convertannotjson - output(test) - tsv - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_neoantigens.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,e36ea44c5cc6130f40b1a100f7e84fb1" + ], + "neoantigenTSV": [ + [ + { + "id": "test" + }, + "test_neoantigens.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e36ea44c5cc6130f40b1a100f7e84fb1" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-24T16:49:41.629164" + } +} \ No newline at end of file diff --git a/modules/msk/neoantigenutils/convertannotjson/tests/tags.yml b/modules/msk/neoantigenutils/convertannotjson/tests/tags.yml new file mode 100644 index 0000000..70333d7 --- /dev/null +++ b/modules/msk/neoantigenutils/convertannotjson/tests/tags.yml @@ -0,0 +1,2 @@ +neoantigenutils/convertannotjson: + - "modules/msk/neoantigenutils/convertannotjson/**" diff --git a/modules/msk/neoantigenutils/formatnetmhcpan/environment.yml b/modules/msk/neoantigenutils/formatnetmhcpan/environment.yml index 4f886c1..4c59b93 100644 --- a/modules/msk/neoantigenutils/formatnetmhcpan/environment.yml +++ b/modules/msk/neoantigenutils/formatnetmhcpan/environment.yml @@ -1,9 +1,7 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "neoantigenutils_formatnetmhcpan" channels: - conda-forge - bioconda - - defaults dependencies: - - "NEOANTIGENUTILS" + - "YOUR-TOOL=HERE" diff --git a/modules/msk/neoantigenutils/formatnetmhcpan/main.nf b/modules/msk/neoantigenutils/formatnetmhcpan/main.nf index d6d67d1..b113c2b 100644 --- a/modules/msk/neoantigenutils/formatnetmhcpan/main.nf +++ b/modules/msk/neoantigenutils/formatnetmhcpan/main.nf @@ -20,10 +20,13 @@ process NEOANTIGENUTILS_FORMATNETMHCPAN { def prefix = task.ext.prefix ?: "${meta.id}" def netmhcOutputType = meta.typeMut ? "--type_MUT": "" def netmhcOutputFrom = meta.fromStab ? "--from_STAB": "" + def netmhcOutputPan = meta.fromPan ? "": "--from_NETMHC3" + """ format_netmhcpan_output.py \ --netMHCpan_output ${netmhcPanOutput} \ --id ${prefix} \ + ${netmhcOutputPan} \ ${netmhcOutputType} \ ${netmhcOutputFrom} diff --git a/modules/msk/neoantigenutils/formatnetmhcpan/meta.yml b/modules/msk/neoantigenutils/formatnetmhcpan/meta.yml index b7776bc..d9ab139 100644 --- a/modules/msk/neoantigenutils/formatnetmhcpan/meta.yml +++ b/modules/msk/neoantigenutils/formatnetmhcpan/meta.yml @@ -1,7 +1,8 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "neoantigenutils_formatnetmhcpan" -description: Takes the standard out of netmhcpan tools and converts them to a tsv for downstream processing +description: + Takes the standard out of netmhcpan tools and converts them to a tsv + for downstream processing keywords: - neoantigen - tsv @@ -12,37 +13,40 @@ tools: description: "Collection of helper scripts for neoantigen processing" documentation: "https://github.com/mskcc-omics-workflows/modules" licence: [""] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information. - typeMut indicated if a mutated fasta was used - fromStab indicates if the output was from netmhcstabpan - e.g. `[ id:'sample1', typeMut: false, fromStab: false ]` - - - netmhcOutput: - type: file - description: Maf outputtted by Tempo that was run through phyloWGS - pattern: "*.{output}" - + - - meta: + type: map + description: | + Groovy Map containing sample information. + typeMut indicated if a mutated fasta was used + fromStab indicates if the output was from netmhcstabpan + e.g. `[ id:'sample1', typeMut: false, fromStab: false ]` + - netmhcPanOutput: + type: file + description: + STDOUT file of netMHCstabpan or netMHC runs for MUT and WT. A poorly formated + file of neoantigens. + pattern: "*.WT.*.output,*.MUT.*.output" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - netMHCpanreformatted: - type: file - description: A reformatted file of neoantigens and their binding affinities output by netmhcpan or netmhcstabpan. This contains the wild type antigens - pattern: "*.{tsv}" - + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.tsv": + type: file + description: + A reformatted file of neoantigens and their binding affinities + output by netmhcpan or netmhcstabpan. This contains the wild type antigens + pattern: "*.{tsv}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@johnoooh" - "@nikhil" diff --git a/modules/msk/neoantigenutils/formatnetmhcpan/resources/usr/bin/format_netmhcpan_output.py b/modules/msk/neoantigenutils/formatnetmhcpan/resources/usr/bin/format_netmhcpan_output.py index a11be09..2bce405 100755 --- a/modules/msk/neoantigenutils/formatnetmhcpan/resources/usr/bin/format_netmhcpan_output.py +++ b/modules/msk/neoantigenutils/formatnetmhcpan/resources/usr/bin/format_netmhcpan_output.py @@ -31,18 +31,24 @@ "Thalf(h)", "%Rank_Stab", ] +NETMHC3_HEADER = ["pos", "peptide", "score_el", "affinity", "Identity", "MHC"] -def netMHCpan_out_reformat(netMHCpanoutput, mut, stab, prefix): +def netMHCpan_out_reformat(netMHCoutput, mut, stab, netmhc3, prefix): file_li = [] stab_prefix = "" type_prefix = "WT" + pan_prefix = "pan" if stab: stab_prefix = "stab" if mut: type_prefix = "MUT" - outfilename = "{}_netmHC{}panoutput.{}.tsv".format(prefix, stab_prefix, type_prefix) - with open(netMHCpanoutput, "r") as file: + if netmhc3: + pan_prefix = "" + outfilename = "{}_netmhc{}{}.output.{}.tsv".format( + prefix, pan_prefix, stab_prefix, type_prefix + ) + with open(netMHCoutput, "r") as file: # data = file.read() for line in file: # Remove leading whitespace @@ -53,13 +59,19 @@ def netMHCpan_out_reformat(netMHCpanoutput, mut, stab, prefix): elif line[0].isdigit(): # Print or process the line as needed match = ( - line.strip().replace(" <= WB", "").replace(" <= SB", "") + line.strip() + .replace(" <= WB", "") + .replace(" <= SB", "") + .replace(" WB ", " ") + .replace(" SB ", " ") ) # strip to remove leading/trailing whitespace splititem = match.split() tab_separated_line = "\t".join(splititem) file_li.append(tab_separated_line) if stab: header = "\t".join(STAB_PAN_HEADER) + "\n" + elif netmhc3: + header = "\t".join(NETMHC3_HEADER) + "\n" else: header = "\t".join(PAN_HEADER) + "\n" with open(outfilename, "w") as file: @@ -80,6 +92,11 @@ def parse_args(): action="store_true", help="Output is from netmhcstab", ) + parser.add_argument( + "--from_NETMHC3", + action="store_true", + help="Output is from the older netmhc version 3.4", + ) parser.add_argument("--id", required=True, help="Prefix to label the output") parser.add_argument( "-v", "--version", action="version", version="%(prog)s {}".format(VERSION) @@ -90,7 +107,7 @@ def parse_args(): def main(args): netMHCpan_out_reformat( - args.netMHCpan_output, args.type_MUT, args.from_STAB, args.id + args.netMHCpan_output, args.type_MUT, args.from_STAB, args.from_NETMHC3, args.id ) diff --git a/modules/msk/neoantigenutils/formatnetmhcpan/tests/main.nf.test b/modules/msk/neoantigenutils/formatnetmhcpan/tests/main.nf.test index 93e87c7..9e2206f 100644 --- a/modules/msk/neoantigenutils/formatnetmhcpan/tests/main.nf.test +++ b/modules/msk/neoantigenutils/formatnetmhcpan/tests/main.nf.test @@ -18,7 +18,7 @@ nextflow_process { """ input[0] = [ - [ id:'test', typeMut: true, fromStab: false ], // meta map + [ id:'test', typeMut: true, fromStab: false, fromPan: true ], // meta map file(params.test_data_mskcc['neoantigen']['MUTnetMHCpan'], checkIfExists: true) ] """ @@ -42,7 +42,55 @@ nextflow_process { """ input[0] = [ - [ id:'test', typeMut: false, fromStab: false], // meta map + [ id:'test', typeMut: false, fromStab: false, fromPan: true], // meta map + file(params.test_data_mskcc['neoantigen']['WTnetMHCpan'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("neoantigenutils_formatnetmhcpan - output(MUT,netmhc) - tsv") { + + when { + + process { + """ + + input[0] = [ + [ id:'test', typeMut: true, fromStab: false, fromPan: false ], // meta map + file(params.test_data_mskcc['neoantigen']['MUTnetMHCpan'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("neoantigenutils_formatnetmhcpan - output(WT,netmhc) - tsv") { + + when { + + process { + """ + + input[0] = [ + [ id:'test', typeMut: false, fromStab: false, fromPan: false ], // meta map file(params.test_data_mskcc['neoantigen']['WTnetMHCpan'], checkIfExists: true) ] """ @@ -66,7 +114,7 @@ nextflow_process { """ input[0] = [ - [ id:'test', typeMut: true, fromStab: true ], // meta map + [ id:'test', typeMut: true, fromStab: true, fromPan: true ], // meta map file(params.test_data_mskcc['neoantigen']['MUTnetMHCpanstab'], checkIfExists: true) ] """ @@ -90,7 +138,7 @@ nextflow_process { """ input[0] = [ - [ id:'test', typeMut: false, fromStab: false ], // meta map + [ id:'test', typeMut: false, fromStab: false, fromPan: true ], // meta map file(params.test_data_mskcc['neoantigen']['WTnetMHCpanstab'], checkIfExists: true) ] """ @@ -115,7 +163,7 @@ nextflow_process { """ input[0] = [ - [ id:'test', typeMut: true, fromStab: false ], // meta map + [ id:'test', typeMut: true, fromStab: false, fromPan: true ], // meta map file('MUTnetMHCpan') ] """ diff --git a/modules/msk/neoantigenutils/formatnetmhcpan/tests/main.nf.test.snap b/modules/msk/neoantigenutils/formatnetmhcpan/tests/main.nf.test.snap index 685825a..f821cd3 100644 --- a/modules/msk/neoantigenutils/formatnetmhcpan/tests/main.nf.test.snap +++ b/modules/msk/neoantigenutils/formatnetmhcpan/tests/main.nf.test.snap @@ -7,9 +7,10 @@ { "id": "test", "typeMut": true, - "fromStab": false + "fromStab": false, + "fromPan": true }, - "test_netmHCpanoutput.MUT.tsv:md5,7f00f2df190fe801700b626b72dfdb99" + "test_netmhcpan.output.MUT.tsv:md5,26f40dde35f53f5ced8a251f59dcc77f" ] ], "1": [ @@ -20,9 +21,10 @@ { "id": "test", "typeMut": true, - "fromStab": false + "fromStab": false, + "fromPan": true }, - "test_netmHCpanoutput.MUT.tsv:md5,7f00f2df190fe801700b626b72dfdb99" + "test_netmhcpan.output.MUT.tsv:md5,26f40dde35f53f5ced8a251f59dcc77f" ] ], "versions": [ @@ -30,7 +32,50 @@ ] } ], - "timestamp": "2024-07-30T13:46:27.878268" + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-22T17:15:16.841503839" + }, + "neoantigenutils_formatnetmhcpan - output(WT,netmhc) - tsv": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "typeMut": false, + "fromStab": false, + "fromPan": false + }, + "test_netmhc.output.WT.tsv:md5,36ec8de972a796cb937e8590a2eed2ec" + ] + ], + "1": [ + "versions.yml:md5,2c02f5f3103ee1532c27f7f3b873a578" + ], + "netMHCpanreformatted": [ + [ + { + "id": "test", + "typeMut": false, + "fromStab": false, + "fromPan": false + }, + "test_netmhc.output.WT.tsv:md5,36ec8de972a796cb937e8590a2eed2ec" + ] + ], + "versions": [ + "versions.yml:md5,2c02f5f3103ee1532c27f7f3b873a578" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-22T17:15:34.581815855" }, "neoantigenutils_formatnetmhcpan - output(MUT,netmhcpan) - tsv - stub": { "content": [ @@ -40,7 +85,8 @@ { "id": "test", "typeMut": true, - "fromStab": false + "fromStab": false, + "fromPan": true }, "test.MUT.PAN.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -53,7 +99,8 @@ { "id": "test", "typeMut": true, - "fromStab": false + "fromStab": false, + "fromPan": true }, "test.MUT.PAN.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" ] @@ -63,7 +110,50 @@ ] } ], - "timestamp": "2024-07-30T13:47:05.72509" + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-22T17:15:51.298318927" + }, + "neoantigenutils_formatnetmhcpan - output(MUT,netmhc) - tsv": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "typeMut": true, + "fromStab": false, + "fromPan": false + }, + "test_netmhc.output.MUT.tsv:md5,5c1d32a4fb53a0d959f7d50bfc2c9fc0" + ] + ], + "1": [ + "versions.yml:md5,2c02f5f3103ee1532c27f7f3b873a578" + ], + "netMHCpanreformatted": [ + [ + { + "id": "test", + "typeMut": true, + "fromStab": false, + "fromPan": false + }, + "test_netmhc.output.MUT.tsv:md5,5c1d32a4fb53a0d959f7d50bfc2c9fc0" + ] + ], + "versions": [ + "versions.yml:md5,2c02f5f3103ee1532c27f7f3b873a578" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-22T17:15:28.833554691" }, "neoantigenutils_formatnetmhcpan - output(WT,netmhcpan) - tsv": { "content": [ @@ -73,9 +163,10 @@ { "id": "test", "typeMut": false, - "fromStab": false + "fromStab": false, + "fromPan": true }, - "test_netmHCpanoutput.WT.tsv:md5,a1d7db1b6f116e96457f2fa60660558e" + "test_netmhcpan.output.WT.tsv:md5,0ef95cf63ee0b8faa8e9d85413e48943" ] ], "1": [ @@ -86,9 +177,10 @@ { "id": "test", "typeMut": false, - "fromStab": false + "fromStab": false, + "fromPan": true }, - "test_netmHCpanoutput.WT.tsv:md5,a1d7db1b6f116e96457f2fa60660558e" + "test_netmhcpan.output.WT.tsv:md5,0ef95cf63ee0b8faa8e9d85413e48943" ] ], "versions": [ @@ -96,7 +188,11 @@ ] } ], - "timestamp": "2024-07-30T13:46:37.183992" + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-22T17:15:23.00992256" }, "neoantigenutils_formatnetmhcpan - output(MUT,netmhcpanstab) - tsv": { "content": [ @@ -106,9 +202,10 @@ { "id": "test", "typeMut": true, - "fromStab": true + "fromStab": true, + "fromPan": true }, - "test_netmHCstabpanoutput.MUT.tsv:md5,246eb723691371ad49bd080071475740" + "test_netmhcpanstab.output.MUT.tsv:md5,246eb723691371ad49bd080071475740" ] ], "1": [ @@ -119,9 +216,10 @@ { "id": "test", "typeMut": true, - "fromStab": true + "fromStab": true, + "fromPan": true }, - "test_netmHCstabpanoutput.MUT.tsv:md5,246eb723691371ad49bd080071475740" + "test_netmhcpanstab.output.MUT.tsv:md5,246eb723691371ad49bd080071475740" ] ], "versions": [ @@ -129,7 +227,11 @@ ] } ], - "timestamp": "2024-07-30T13:46:47.110076" + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-22T17:15:40.122191248" }, "neoantigenutils_formatnetmhcpan - output(WT,netmhcpanstab) - tsv": { "content": [ @@ -139,9 +241,10 @@ { "id": "test", "typeMut": false, - "fromStab": false + "fromStab": false, + "fromPan": true }, - "test_netmHCpanoutput.WT.tsv:md5,b95a6624d4010eb6517ca880a13e670d" + "test_netmhcpan.output.WT.tsv:md5,b95a6624d4010eb6517ca880a13e670d" ] ], "1": [ @@ -152,9 +255,10 @@ { "id": "test", "typeMut": false, - "fromStab": false + "fromStab": false, + "fromPan": true }, - "test_netmHCpanoutput.WT.tsv:md5,b95a6624d4010eb6517ca880a13e670d" + "test_netmhcpan.output.WT.tsv:md5,b95a6624d4010eb6517ca880a13e670d" ] ], "versions": [ @@ -162,6 +266,10 @@ ] } ], - "timestamp": "2024-07-30T13:46:56.841519" + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-22T17:15:45.911170919" } } \ No newline at end of file diff --git a/modules/msk/neoantigenutils/generatehlastring/environment.yml b/modules/msk/neoantigenutils/generatehlastring/environment.yml index 21ef0ea..4c59b93 100644 --- a/modules/msk/neoantigenutils/generatehlastring/environment.yml +++ b/modules/msk/neoantigenutils/generatehlastring/environment.yml @@ -1,9 +1,7 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "neoantigenutils_generatehlastring" channels: - conda-forge - bioconda - - defaults dependencies: - - "NEOANTIGENUTILS" + - "YOUR-TOOL=HERE" diff --git a/modules/msk/neoantigenutils/generatehlastring/meta.yml b/modules/msk/neoantigenutils/generatehlastring/meta.yml index 08709c2..c6fca8e 100644 --- a/modules/msk/neoantigenutils/generatehlastring/meta.yml +++ b/modules/msk/neoantigenutils/generatehlastring/meta.yml @@ -1,4 +1,3 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "neoantigenutils_generatehlastring" description: Generate the hla string for netmhc tools @@ -12,35 +11,30 @@ tools: description: "Collection of helper scripts for neoantigen processing" documentation: "https://github.com/mskcc-omics-workflows/modules" licence: [""] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information. - e.g. `[ id:'sample1', single_end:false ]` - - - inputHLA: - type: file - description: Winners HLA file from polysolver - pattern: "*.{hla.txt}" - + - - meta: + type: map + description: | + Groovy Map containing sample information. + e.g. `[ id:'sample1', single_end:false ]` + - inputHLA: + type: file + description: Winners HLA file from polysolver + pattern: "*.{hla.txt}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - hlastring: - type: string - description: HLA string to use for netmhc tool input - + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@johnoooh" - "@nikhil" diff --git a/modules/msk/neoantigenutils/generatemutfasta/environment.yml b/modules/msk/neoantigenutils/generatemutfasta/environment.yml index 3ab33c4..4c59b93 100644 --- a/modules/msk/neoantigenutils/generatemutfasta/environment.yml +++ b/modules/msk/neoantigenutils/generatemutfasta/environment.yml @@ -1,9 +1,7 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "neoantigenutils_generatemutfasta" channels: - conda-forge - bioconda - - defaults dependencies: - - "NEOANTIGENUTILS" + - "YOUR-TOOL=HERE" diff --git a/modules/msk/neoantigenutils/generatemutfasta/meta.yml b/modules/msk/neoantigenutils/generatemutfasta/meta.yml index 5d319c9..ba57423 100644 --- a/modules/msk/neoantigenutils/generatemutfasta/meta.yml +++ b/modules/msk/neoantigenutils/generatemutfasta/meta.yml @@ -1,4 +1,3 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "neoantigenutils_generatemutfasta" description: Generate the mutation fasta for netmhc tools @@ -12,55 +11,52 @@ tools: description: "Collection of helper scripts for neoantigen processing" documentation: "https://github.com/mskcc-omics-workflows/modules" licence: [""] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information. - e.g. `[ id:'sample1', single_end:false ]` - - - inputMaf: - type: file - description: Maf outputtted by Tempo that was run through phyloWGS - pattern: "*.{maf}" - - - cds: - type: file - description: coding sequence resource fasta - pattern: "*.{cds.all.fa.gz}" - - - cdna: - type: file - description: cDNA resource fasta - pattern: "*.{cdna.all.fa.gz}" - + - - meta: + type: map + description: | + Groovy Map containing sample information. + e.g. `[ id:'sample1', single_end:false ]` + - inputMaf: + type: file + description: Maf outputtted by Tempo that was run through phyloWGS + pattern: "*.{maf}" + - - cds: + type: file + description: coding sequence resource fasta + pattern: "*.{cds.all.fa.gz}" + - cdna: + type: file + description: cDNA resource fasta + pattern: "*.{cdna.all.fa.gz}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - json: - type: file - description: output combined Json ready for input into the neoantigen pipeline - pattern: "*.{json}" - - mut_fasta: - type: file - description: Mutated fasta sequence - pattern: "*.{MUT_sequences.fa}" - + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*_out/*.MUT_sequences.fa": + type: file + description: Mutated fasta sequence + pattern: "*.{MUT_sequences.fa}" - wt_fasta: - type: file - description: Wildtype fasta sequence - pattern: "*.{WT_sequences.fa}" - + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*_out/*.WT_sequences.fa": + type: file + description: Wildtype fasta sequence + pattern: "*.{WT_sequences.fa}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@johnoooh" - "@nikhil" diff --git a/modules/msk/neoantigenutils/generatemutfasta/resources/usr/bin/generateMutFasta.py b/modules/msk/neoantigenutils/generatemutfasta/resources/usr/bin/generateMutFasta.py index 84d23fc..253a41b 100755 --- a/modules/msk/neoantigenutils/generatemutfasta/resources/usr/bin/generateMutFasta.py +++ b/modules/msk/neoantigenutils/generatemutfasta/resources/usr/bin/generateMutFasta.py @@ -381,24 +381,6 @@ def __init__(self, maf_row, cds_seq, cdna_seq): encoded_position = encoded_start + ALPHABET[sum_remaining % 26] + encoded_end - if self.maf_row["Tumor_Seq_Allele2"] == "-": - # handles deletion - if len(self.maf_row["Reference_Allele"]) > 3: - Allele2code = self.maf_row["Reference_Allele"][0:3] - else: - Allele2code = self.maf_row["Reference_Allele"] - - elif len(self.maf_row["Tumor_Seq_Allele2"]) > 1: - # handles INS and DNP - if len(self.maf_row["Tumor_Seq_Allele2"]) > 3: - Allele2code = self.maf_row["Tumor_Seq_Allele2"][0:3] - else: - Allele2code = self.maf_row["Tumor_Seq_Allele2"] - - else: - # SNPs - Allele2code = self.maf_row["Tumor_Seq_Allele2"] - if self.maf_row["Tumor_Seq_Allele2"] == "-": # handles deletion if len(self.maf_row["Reference_Allele"]) > 3: @@ -434,7 +416,6 @@ def __init__(self, maf_row, cds_seq, cdna_seq): + "SY" + Allele2code ) - print(self.identifier_key) ### Check if the variant_classification is among those that can generate a neoantigen def is_non_syn(self): diff --git a/modules/msk/neoantigenutils/generatemutfasta/tests/main.nf.test.snap b/modules/msk/neoantigenutils/generatemutfasta/tests/main.nf.test.snap index 6f36db5..132e856 100644 --- a/modules/msk/neoantigenutils/generatemutfasta/tests/main.nf.test.snap +++ b/modules/msk/neoantigenutils/generatemutfasta/tests/main.nf.test.snap @@ -47,10 +47,10 @@ } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-06-11T15:34:00.546613" + "timestamp": "2024-11-22T17:16:33.363826904" }, "neoantigenutils_generatemutfasta - maf - fasta - stub": { "content": [ @@ -100,9 +100,9 @@ } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-06-11T15:34:11.407096" + "timestamp": "2024-11-22T17:16:40.580416341" } } \ No newline at end of file diff --git a/modules/msk/neoantigenutils/neoantigeninput/environment.yml b/modules/msk/neoantigenutils/neoantigeninput/environment.yml index 5cc66dd..4c59b93 100644 --- a/modules/msk/neoantigenutils/neoantigeninput/environment.yml +++ b/modules/msk/neoantigenutils/neoantigeninput/environment.yml @@ -1,9 +1,7 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "neoantigenutils_neoantigeninput" channels: - conda-forge - bioconda - - defaults dependencies: - - "NEOANTIGENUTILS" + - "YOUR-TOOL=HERE" diff --git a/modules/msk/neoantigenutils/neoantigeninput/main.nf b/modules/msk/neoantigenutils/neoantigeninput/main.nf index 747b1e5..90147b3 100644 --- a/modules/msk/neoantigenutils/neoantigeninput/main.nf +++ b/modules/msk/neoantigenutils/neoantigeninput/main.nf @@ -6,7 +6,7 @@ process NEOANTIGENUTILS_NEOANTIGENINPUT { 'docker.io/mskcc/neoantigen-utils-base:1.1.0' }" input: - tuple val(meta), path(inputMaf), path(hlaFile) + tuple val(meta), path(inputMaf), path(inputBedpe, arity: '0..*'), path(hlaFile) tuple val(meta2), path(phyloWGSsumm), path(phyloWGSmut), path(phyloWGSfolder) tuple val(meta3), path(mutNetMHCpan), path(wtNetMHCpan) @@ -22,6 +22,7 @@ process NEOANTIGENUTILS_NEOANTIGENINPUT { def id = task.ext.prefix ?: "${meta.id}" def patientid = task.ext.cohort ?: "${meta.id}_patient" def cohort = task.ext.cohort ?: "${meta.id}_cohort" + def bedpe = inputBedpe ? "--bedpe_file ${inputBedpe}": "" """ tree_folder_name=\$(basename -s .zip "${phyloWGSfolder}") @@ -33,16 +34,16 @@ process NEOANTIGENUTILS_NEOANTIGENINPUT { generate_input.py --maf_file ${inputMaf} \ + ${bedpe} \ --summary_file ${id}.summ.json \ --mutation_file ${id}.mut.json \ --tree_directory \$tree_folder_name \ --id ${id} --patient_id ${patientid} \ --cohort ${cohort} --HLA_genes ${hlaFile} \ --netMHCpan_MUT_input ${mutNetMHCpan} \ - --netMHCpan_WT_input ${wtNetMHCpan} + --netMHCpan_WT_input ${wtNetMHCpan} \ ${args} - cat <<-END_VERSIONS > versions.yml "${task.process}": neoantigeninput: \$(echo \$(generate_input.py -v)) diff --git a/modules/msk/neoantigenutils/neoantigeninput/meta.yml b/modules/msk/neoantigenutils/neoantigeninput/meta.yml index 8f22679..6e25bd9 100644 --- a/modules/msk/neoantigenutils/neoantigeninput/meta.yml +++ b/modules/msk/neoantigenutils/neoantigeninput/meta.yml @@ -1,7 +1,8 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "neoantigenutils_neoantigeninput" -description: This module take several inputs to the Lukza neoantigen pipeline and combines them into a single json file ready for input into their pipeline +description: + This module take several inputs to the Lukza neoantigen pipeline and + combines them into a single json file ready for input into their pipeline keywords: - neoantigen - aggregate @@ -11,65 +12,76 @@ tools: description: "Collection of helper scripts for neoantigen processing" documentation: "https://github.com/mskcc-omics-workflows/modules" licence: [""] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information. Maybe cohort and patient_id as well? - e.g. `[ id:'sample1', single_end:false ]` - - - inputMaf: - type: file - description: Maf outputtted by Tempo that was run through phyloWGS - pattern: "*.{maf}" - - - phyloWGSsumm: - type: file - description: Summ json outputtted by phyloWGS - pattern: "*.{json.gz}" - - - phyloWGSmut: - type: file - description: Summary json outputtted by phyloWGS - pattern: "*.{json.gz}" - - - phyloWGSfolder: - type: file - description: Folder of mutations in trees output by PhyloWGS - pattern: ".{zip}" - - - mutNetMHCpan: - type: file - description: tsv formatted output from netMHCpan with the mutated neoantigens . - pattern: ".{tsv}" - - - wtNetMHCpan: - type: file - description: tsv formatted STDOUT file of netMHCpan. A poorly formated file of neoantigens. This containes the wild type antigens - pattern: ".{tsv}" - - - hlaFile: - type: file - description: HLA tsv outputtted by Polysolver - pattern: "winners.{tsv}" - + - - meta: + type: map + description: | + Groovy Map containing sample information. Maybe cohort and patient_id as well? + e.g. `[ id:'sample1', single_end:false ]` + - inputMaf: + type: file + description: Maf outputtted by Tempo that was run through phyloWGS + pattern: "*.{maf}" + - inputBedpe: + type: file + description: bedpe file containing SVs + pattern: "winners.{tsv}" + - hlaFile: + type: file + description: HLA tsv outputtted by Polysolver + pattern: "winners.{tsv}" + - - meta2: + type: map + description: | + Groovy Map containing sample information. Maybe cohort and patient_id as well? + e.g. `[ id:'sample1', single_end:false ]` + - phyloWGSsumm: + type: file + description: Summ json outputtted by phyloWGS + pattern: "*.{json.gz}" + - phyloWGSmut: + type: file + description: Summary json outputtted by phyloWGS + pattern: "*.{json.gz}" + - phyloWGSfolder: + type: file + description: Folder of mutations in trees output by PhyloWGS + pattern: ".{zip}" + - - meta3: + type: map + description: | + Groovy Map containing sample information. Maybe cohort and patient_id as well? + e.g. `[ id:'sample1', single_end:false ]` + - mutNetMHCpan: + type: file + description: + tsv formatted output from netMHCpan with the mutated neoantigens + . + pattern: ".{tsv}" + - wtNetMHCpan: + type: file + description: + tsv formatted STDOUT file of netMHCpan. A poorly formated file + of neoantigens. This containes the wild type antigens + pattern: ".{tsv}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - json: - type: file - description: output combined Json ready for input into the neoantigen pipeline - pattern: "*.{json}" - + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*_.json": + type: file + description: output combined Json ready for input into the neoantigen pipeline + pattern: "*.{json}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@johnoooh" - "@nikhil" diff --git a/modules/msk/neoantigenutils/neoantigeninput/resources/usr/bin/generate_input.py b/modules/msk/neoantigenutils/neoantigeninput/resources/usr/bin/generate_input.py index 5aa3e09..5c0b45a 100755 --- a/modules/msk/neoantigenutils/neoantigeninput/resources/usr/bin/generate_input.py +++ b/modules/msk/neoantigenutils/neoantigeninput/resources/usr/bin/generate_input.py @@ -3,11 +3,13 @@ import json import pandas as pd import argparse +import os from Bio import pairwise2 from Bio.pairwise2 import format_alignment import numpy as np -VERSION = 1.7 +VERSION = 1.8 + def main(args): @@ -105,7 +107,7 @@ def makeChild(subTree, start): for index, row in mafdf.iterrows(): if ( - #We + # We row["Variant_Type"] == "SNP" or row["Variant_Type"] == "DEL" or row["Variant_Type"] == "INS" @@ -117,8 +119,11 @@ def makeChild(subTree, start): else: missense = 0 - print(row["Variant_Type"]) - if row["Variant_Type"] == "SNP" or row["Variant_Type"] == "DNP" or row["Variant_Type"] == "TNP": + if ( + row["Variant_Type"] == "SNP" + or row["Variant_Type"] == "DNP" + or row["Variant_Type"] == "TNP" + ): chrom_pos_dict[ str(row["Chromosome"]) + "_" @@ -208,15 +213,15 @@ def makeChild(subTree, start): ) elif row["Variant_Type"] == "INS": - print( - str(row["Chromosome"]) - + "_" - + str(row["Start_Position"]) - + "_" - + "I" - + "_" - + row["Tumor_Seq_Allele2"] - ) + # print( + # str(row["Chromosome"]) + # + "_" + # + str(row["Start_Position"]) + # + "_" + # + "I" + # + "_" + # + row["Tumor_Seq_Allele2"] + # ) chrom_pos_dict[ str(row["Chromosome"]) + "_" @@ -316,7 +321,10 @@ def convert_polysolver_hla(polyHLA): outer_dict["neoantigens"] = [] - # print(mutation_dict) + if args.bedpe_file: + bedpe_list, bedpe_dict = bedpe_load(args.bedpe_file) + + bedpe_match_dict = {} neoantigen_mut_in = pd.read_csv(args.netMHCpan_MUT_input, sep="\t") neoantigen_WT_in = pd.read_csv(args.netMHCpan_WT_input, sep="\t") @@ -330,38 +338,82 @@ def find_first_difference_index(str1, str2): return min_length WTdict = {} - + SVWTdict = {} for index_WT, row_WT in neoantigen_WT_in.iterrows(): + noposID = "" + id = "" + wtsvid = "" + row_WT_identity = trim_id(row_WT["Identity"]) + IDsplit = row_WT_identity.split("_") + if len(IDsplit[0]) < 3: + # it is from neoSV + IDsplit = row_WT_identity.split("_") + wtsvid = ( + IDsplit[0] + + IDsplit[1][0:7] + + "_" + + str(len(row_WT["peptide"])) + + "_" + + row_WT["MHC"].split("-")[1].replace(":", "").replace("*", "") + + "_" + + str(row_WT["pos"]) + ) + noposID = ( + IDsplit[0] + + "_" + + IDsplit[1][0:7] + + "_" + + str(len(row_WT["peptide"])) + + "_" + + row_WT["MHC"].split("-")[1].replace(":", "").replace("*", "") + ) + WTdict[wtsvid] = { + "affinity": row_WT["affinity"], + "peptide": row_WT["peptide"], + } + id = wtsvid + if noposID not in WTdict: + WTdict[noposID] = { + "peptides": { + row_WT["peptide"]: id + }, # This is a dict so we can match the peptide with the actual ID later + "affinity": row_WT["affinity"], + } - id = ( - row_WT["Identity"][:-2] - + "_" - + str(len(row_WT["peptide"])) - + "_" - + row_WT["MHC"].split("-")[1].replace(":", "").replace("*", "") - + "_" - + str(row_WT["pos"]) - ) + else: + WTdict[noposID]["peptides"][row_WT["peptide"]] = id - noposID = ( - row_WT["Identity"][:-2] - + "_" - + str(len(row_WT["peptide"])) - + "_" - + row_WT["MHC"].split("-")[1].replace(":", "").replace("*", "") - ) - WTdict[id] = {"affinity": row_WT["affinity"], "peptide": row_WT["peptide"]} + else: + id = ( + row_WT_identity[:-2] + + "_" + + str(len(row_WT["peptide"])) + + "_" + + row_WT["MHC"].split("-")[1].replace(":", "").replace("*", "") + + "_" + + str(row_WT["pos"]) + ) - # This is used as last resort for the matching. We will preferentially find the peptide matching in length as well as POS. Worst case we will default to the WT pos 0 - if noposID not in WTdict: - WTdict[noposID] = { - 'peptides' : {row_WT["peptide"]:id}, #This is a dict so we can match the peptide with the ID later - "affinity": row_WT["affinity"] - } + noposID = ( + row_WT_identity[:-2] + + "_" + + str(len(row_WT["peptide"])) + + "_" + + row_WT["MHC"].split("-")[1].replace(":", "").replace("*", "") + ) + WTdict[id] = {"affinity": row_WT["affinity"], "peptide": row_WT["peptide"]} + + # This is used as last resort for the matching. We will preferentially find the peptide matching in length as well as POS. Worst case we will default to the WT pos 0 + if noposID not in WTdict: + WTdict[noposID] = { + "peptides": { + row_WT["peptide"]: id + }, # This is a dict so we can match the peptide with the ID later + "affinity": row_WT["affinity"], + } - else: - # print(WTdict[noposID]['peptides']) - WTdict[noposID]['peptides'][row_WT["peptide"]]=id + else: + WTdict[noposID]["peptides"][row_WT["peptide"]] = id def find_most_similar_string(target, strings): max_score = -1 @@ -370,117 +422,209 @@ def find_most_similar_string(target, strings): most_similar_string2 = None first_AA_same = None first_AA_same_score = -1 - + len_target = len(target) for s in strings: - alignments = pairwise2.align.globalxx(target, s) - score = alignments[0][2] # The third element is the score - - if score > max_score2: - - if score > max_score: - max_score2 = max_score - most_similar_string2 = most_similar_string - max_score = score - most_similar_string = s - - else: - max_score2 = score - most_similar_string2 = s - - if target[0]==s[0]: - if score > first_AA_same_score: - first_AA_same = s - first_AA_same_score = score - - return most_similar_string, most_similar_string2, first_AA_same, first_AA_same_score, max_score + if len(s) == len_target: + alignments = pairwise2.align.globalxx(target, s) + score = alignments[0][2] # The third element is the score + + if score > max_score2: + + if score > max_score: + max_score2 = max_score + most_similar_string2 = most_similar_string + max_score = score + most_similar_string = s + + else: + max_score2 = score + most_similar_string2 = s + + if target[0] == s[0]: + if score > first_AA_same_score: + first_AA_same = s + first_AA_same_score = score + + return ( + most_similar_string, + most_similar_string2, + first_AA_same, + first_AA_same_score, + max_score, + ) for index_mut, row_mut in neoantigen_mut_in.iterrows(): - IDsplit = row_mut["Identity"].split('_') - if row_mut["affinity"]< 500: + row_MUT_identity = trim_id(row_mut["Identity"]) + IDsplit = row_MUT_identity.split("_") + SV = False + if row_mut["affinity"] < 500: peplen = len(row_mut["peptide"]) matchfound = False - IDsplit = row_mut["Identity"].split('_') - if (IDsplit[1][0] == "S" and IDsplit[1][1] != 'p') : - #If it is a silent mutation. Silent mutations can either be S or SY. These include intron mutations. Splices can be Sp + if IDsplit[1][0] == "S" and IDsplit[1][1] != "p": + # If it is a silent mutation. Silent mutations can either be S or SY. These include intron mutations. Splices can be Sp continue - # first find match in WT - WTid = ( - row_mut["Identity"][:-2] - + "_" - + str(peplen) - + "_" - + row_mut["MHC"].split("-")[1].replace(":", "").replace("*", "") - + "_" - + str(row_mut["pos"]) - ) - - noposID = ( - row_mut["Identity"][:-2] - + "_" - + str(peplen) - + "_" - + row_mut["MHC"].split("-")[1].replace(":", "").replace("*", "") - ) - - if WTid in WTdict and ('M' == IDsplit[1][0] and 'Sp' not in row_mut["Identity"]): + if row_MUT_identity.count("_") == 1: + # its an SV + SV = True + WTid = ( + IDsplit[0] + + IDsplit[1][0:8] + + "_" + + str(len(row_mut["peptide"])) + + "_" + + row_mut["MHC"].split("-")[1].replace(":", "").replace("*", "") + + "_" + + str(row_mut["pos"]) + ) + noposID = ( + IDsplit[0] + + "_" + + IDsplit[1][0:8] + + "_" + + str(len(row_mut["peptide"])) + + "_" + + row_mut["MHC"].split("-")[1].replace(":", "").replace("*", "") + ) + # this part makes the dict that matches this to the bedpe + bedpe_match_dict[row_MUT_identity] = ( + IDsplit[0] + "_" + IDsplit[1][0:4] + ) + else: + # first find match in WT + WTid = ( + row_MUT_identity[:-2] + + "_" + + str(peplen) + + "_" + + row_mut["MHC"].split("-")[1].replace(":", "").replace("*", "") + + "_" + + str(row_mut["pos"]) + ) + noposID = ( + row_MUT_identity[:-2] + + "_" + + str(peplen) + + "_" + + row_mut["MHC"].split("-")[1].replace(":", "").replace("*", "") + ) + if ( + WTid in WTdict + and ("M" == IDsplit[1][0] and "Sp" not in row_MUT_identity) + or SV == False + ): # match matchfound = True best_pepmatch = WTdict[WTid]["peptide"] + frameshift = False else: - if "-" in row_mut["Identity"] or "+" in row_mut["Identity"] and WTid in WTdict: + if ( + "-" in row_MUT_identity + or "+" in row_MUT_identity + and WTid in WTdict + or SV == False + ): # Means there is a frame shift and we don't need to do a analysis of 5' end and 3' end as 3' end is no longer recognizeable/comparable to the WT sequence at all # We can just move the windows along together. There will likely be little to no match with the WT peptides. matchfound = True best_pepmatch = WTdict[WTid]["peptide"] - # print(mutation_dict[row_mut["Identity"]]) - + frameshift = False else: - best_pepmatch,best_pepmatch2 , first_AA_same, first_AA_same_score, match_score = find_most_similar_string(row_mut["peptide"],list(WTdict[noposID]['peptides'].keys())) - - if best_pepmatch == row_mut["peptide"]: - #it seems this can happen where the row_mut is actually the canonical sequence. + # Here we take care of frameshifted peptides + frameshift = True + ( + best_pepmatch, + best_pepmatch2, + first_AA_same, + first_AA_same_score, + match_score, + ) = find_most_similar_string( + row_mut["peptide"], list(WTdict[noposID]["peptides"].keys()) + ) + if ( + best_pepmatch == row_mut["peptide"] + or best_pepmatch2 == row_mut["peptide"] + ): + # it seems this can happen where the row_mut is actually the canonical sequence. # In this case we don't want to report the peptide as a neoantigen, its not neo continue - elif (best_pepmatch[0] != row_mut["peptide"][0] and best_pepmatch2[0] == row_mut["peptide"][0]) or (best_pepmatch[-1] != row_mut["peptide"][-1] and best_pepmatch2[-1] == row_mut["peptide"][-1]): + elif ( + best_pepmatch[0] != row_mut["peptide"][0] + and best_pepmatch2[0] == row_mut["peptide"][0] + ) or ( + best_pepmatch[-1] != row_mut["peptide"][-1] + and best_pepmatch2[-1] == row_mut["peptide"][-1] + ): # We should preferentially match the first AA if we can. I have found that the pairwise alignment isnt always the best at this. # It will also do this when the last AA of the best match doesnt match but the last A of the second best match does best_pepmatch = best_pepmatch2 - WTid = WTdict[noposID]['peptides'][best_pepmatch] - matchfound=True + WTid = WTdict[noposID]["peptides"][best_pepmatch] + matchfound = True if matchfound == True: mut_pos = ( find_first_difference_index( - row_mut["peptide"], best_pepmatch #WTdict[WTid]["peptide"] + row_mut["peptide"], best_pepmatch # WTdict[WTid]["peptide"] ) + 1 ) - neo_dict = { - "id": row_mut["Identity"] - + "_" - + str(peplen) - + "_" - + row_mut["MHC"].split("-")[1].replace(":", "").replace("*", "") - + "_" - + str(row_mut["pos"]), - "mutation_id": mutation_dict[row_mut["Identity"]], - "HLA_gene_id": row_mut["MHC"], - "sequence": row_mut["peptide"], - "WT_sequence": best_pepmatch ,#WTdict[WTid]["peptide"], - "mutated_position": mut_pos, - "Kd": float(row_mut["affinity"]), - "KdWT": float(WTdict[WTid]["affinity"]), - } + if frameshift: + mut_pos = "Frameshifted peptide" + + if SV: + neo_dict = { + "id": row_MUT_identity + + "_" + + str(peplen) + + "_" + + str(row_mut["pos"]) + + "_" + + row_mut["MHC"].split("-")[1].replace(":", "").replace("*", "") + , + "mutation_id": bedpe_dict[ + bedpe_match_dict[row_MUT_identity] + ].id, + "HLA_gene_id": row_mut["MHC"], + "sequence": row_mut["peptide"], + "WT_sequence": best_pepmatch, # WTdict[WTid]["peptide"], + "mutated_position": mut_pos, + "Kd": float(row_mut["affinity"]), + "KdWT": float(WTdict[WTid]["affinity"]), + } + else: + neo_dict = { + "id": row_MUT_identity + + "_" + + str(peplen) + + "_" + + str(row_mut["pos"]) + + "_" + + row_mut["MHC"].split("-")[1].replace(":", "").replace("*", ""), + "mutation_id": mutation_dict[row_MUT_identity], + "HLA_gene_id": row_mut["MHC"], + "sequence": row_mut["peptide"], + "WT_sequence": best_pepmatch, # WTdict[WTid]["peptide"], + "mutated_position": mut_pos, + "Kd": float(row_mut["affinity"]), + "KdWT": float(WTdict[WTid]["affinity"]), + } outer_dict["neoantigens"].append(neo_dict) outjson = args.patient_id + "_" + args.id + "_" + ".json" with open(outjson, "w") as tstout: json.dump(outer_dict, tstout, indent=1) - # tstout.write(json.dumps(outer_dict)) + +# Sometimes the id is set as .*_M_1 and we want to make sure its _M, otherwise it will not match +def trim_id(id_string): + if "_M_" in id_string: + return id_string.partition("_M_")[0]+"_M" + elif "_V_" in id_string: + return id_string.partition("_V_")[0]+"_V" + else: + return id_string def makeID(maf_row): @@ -523,7 +667,7 @@ def makeID(maf_row): "Frame_shift_Del": "I-", "In_Frame_Ins": "If", "In_Frame_Del": "Id", - "Splice_Site": "Sp" + "Splice_Site": "Sp", } position = int(str(maf_row["Start_Position"])[0:2]) @@ -584,9 +728,176 @@ def makeID(maf_row): return identifier_key +class VariantCallingFormat(object): + """ + Class for storing SV information in VCF format, + all components are in string format + """ + + def __init__(self, chrom, pos, ref, alt): + self.chrom = chrom + self.pos = pos + self.ref = ref + self.alt = alt + + def __str__(self): + return "%s(chrom = %s, pos = %s, ref = %s, alt = %s)" % ( + self.__class__.__name__, + self.chrom, + self.pos, + self.ref, + self.alt, + ) + + def __repr__(self): + return "%s(%s, %s, %s, %s)" % ( + self.__class__.__name__, + self.chrom, + self.pos, + self.ref, + self.alt, + ) + + +class BedpeFormat(object): + """ + Class for storing SV information in BEDPE format, + all components are in string format + """ + + def __init__(self, chrom1, pos1, strand1, chrom2, pos2, strand2, id): + self.chrom1 = chrom1 + self.pos1 = pos1 + self.strand1 = strand1 + self.chrom2 = chrom2 + self.pos2 = pos2 + self.strand2 = strand2 + self.id = id + + def __str__(self): + return ( + "%s(chrom1 = %s, pos1 = %s, strand1 = %s, chrom2 = %s, pos2 = %s, strand2 = %s, id = %s)" + % ( + self.__class__.__name__, + self.chrom1, + self.pos1, + self.strand1, + self.chrom2, + self.pos2, + self.strand2, + self.id, + ) + ) + + def __repr__(self): + return "%s(%s, %s, %s, %s, %s, %s, %s)" % ( + self.__class__.__name__, + self.chrom1, + self.pos1, + self.strand1, + self.chrom2, + self.pos2, + self.strand2, + self.id, + ) + + +def bedpe_load(filepath): + """ + :param filepath: the absolute path of a BEDPE file + :return: a list of BEDPE objects + """ + bedpe_list = [] + bedpedict = {} + filename = os.path.basename(filepath) + line_num = 0 + print("Loading SVs from {0}.".format(filename)) + with open(filepath, "r") as f: + header = next(f) + header = header.rstrip().split("\t") + for line in f: + line_num += 1 + tmpline = line.rstrip().split("\t") + chrom1 = tmpline[header.index("chrom1")].replace("chr", "") + pos1 = tmpline[header.index("start1")] + chrom2 = tmpline[header.index("chrom2")].replace("chr", "") + pos2 = tmpline[header.index("start2")] + strand1 = tmpline[header.index("strand1")] + strand2 = tmpline[header.index("strand2")] + svclass = tmpline[header.index("svclass")] + sv_bedpe_id = tmpline[header.index("sv_id")] + custom_id = makeID_bedpe(chrom1, pos1, svclass) + bedpe = BedpeFormat( + chrom1, pos1, strand1, chrom2, pos2, strand2, sv_bedpe_id + ) + bedpe_list.append(bedpe) + bedpedict[custom_id] = bedpe + + return bedpe_list, bedpedict + + +def makeID_bedpe(chrom1, pos1, svclass): + ##ENCODING FASTA ID FOR USE IN MATCHING LATER + ALPHABET = [ + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "H", + "I", + "J", + "K", + "L", + "M", + "N", + "O", + "P", + "Q", + "R", + "S", + "T", + "U", + "V", + "W", + "X", + "Y", + "Z", + ] + + position = int(str(pos1)[0:2]) + + if position < 26: + encoded_start = ALPHABET[position] + elif position < 100: + encoded_start = ALPHABET[position // 4] + + position = int(str(pos1)[-2:]) + + if position < 26: + encoded_end = ALPHABET[position] + elif position < 100: + encoded_end = ALPHABET[position // 4] + sum_remaining = sum(int(d) for d in str(pos1)[2:-2]) + + encoded_position = encoded_start + ALPHABET[sum_remaining % 26] + encoded_end + + identifier_key = ( + str(chrom1) + + "_" + + encoded_position + + "V" # This indicates structural variant. It is added in the generateMutFasta script as well but not in this function. + ) + + return identifier_key + + def parse_args(): parser = argparse.ArgumentParser(description="Process input files and parameters") parser.add_argument("--maf_file", required=True, help="Path to the MAF file") + parser.add_argument("--bedpe_file", required=False, help="Path to the bedpe file") parser.add_argument( "--summary_file", required=True, help="Path to the summary file" ) @@ -641,3 +952,4 @@ def parse_args(): print("patient_data_file File:", args.patient_data_file) main(args) + diff --git a/modules/msk/neoantigenutils/neoantigeninput/tests/main.nf.test b/modules/msk/neoantigenutils/neoantigeninput/tests/main.nf.test index 778654f..577790e 100644 --- a/modules/msk/neoantigenutils/neoantigeninput/tests/main.nf.test +++ b/modules/msk/neoantigenutils/neoantigeninput/tests/main.nf.test @@ -10,7 +10,7 @@ nextflow_process { tag "neoantigenutils/neoantigeninput" tag "modules_msk" - test("neoantigenutils_neoantigeninput - json,tsv") { + test("neoantigenutils_neoantigeninput - bedpe,json,tsv") { when { params { @@ -23,6 +23,7 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map file(params.test_data_mskcc['neoantigen']['temp_test_maf'], checkIfExists: true), + file(params.test_data_mskcc['neoantigen']['svbedpe'], checkIfExists: true), file(params.test_data_mskcc['neoantigen']['winners_hla_txt'], checkIfExists: true) ] @@ -52,6 +53,49 @@ nextflow_process { } + test("neoantigenutils_neoantigeninput - json,tsv") { + + when { + params { + folderPath = 'tst' + } + + process { + """ + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data_mskcc['neoantigen']['temp_test_maf'], checkIfExists: true), + [], + file(params.test_data_mskcc['neoantigen']['winners_hla_txt'], checkIfExists: true) + ] + + input[1] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data_mskcc['neoantigen']['test4_summ_json'], checkIfExists: true), + file(params.test_data_mskcc['neoantigen']['test4_muts_json'], checkIfExists: true), + file(params.test_data_mskcc['neoantigen']['test4_mutass_zip'], checkIfExists: true) + ] + + input[2] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data_mskcc['neoantigen']['MUTnetMHC_tsv'], checkIfExists: true), + file(params.test_data_mskcc['neoantigen']['WTnetMHC_tsv'], checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + test("neoantigenutils_neoantigeninput - json,tsv - stub") { @@ -68,6 +112,7 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map file('temp_test_maf'), + file('temp_test_bedpe'), file('winners_hla_txt') ] @@ -97,4 +142,4 @@ nextflow_process { } -} +} \ No newline at end of file diff --git a/modules/msk/neoantigenutils/neoantigeninput/tests/main.nf.test.snap b/modules/msk/neoantigenutils/neoantigeninput/tests/main.nf.test.snap index 48c12c3..f6c2c07 100644 --- a/modules/msk/neoantigenutils/neoantigeninput/tests/main.nf.test.snap +++ b/modules/msk/neoantigenutils/neoantigeninput/tests/main.nf.test.snap @@ -12,7 +12,7 @@ ] ], "1": [ - "versions.yml:md5,4815d54ee3ae0778628a1e51b4b21d9b" + "versions.yml:md5,a1bf48540f950da8922ef24da42f6ec5" ], "json": [ [ @@ -24,15 +24,50 @@ ] ], "versions": [ - "versions.yml:md5,4815d54ee3ae0778628a1e51b4b21d9b" + "versions.yml:md5,a1bf48540f950da8922ef24da42f6ec5" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.04.4" }, - "timestamp": "2024-07-01T16:08:47.586409668" + "timestamp": "2024-11-29T15:37:10.311539" + }, + "neoantigenutils_neoantigeninput - bedpe,json,tsv": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_patient_test_.json:md5,401207e1ed3fb2708291a8eeff5efcd7" + ] + ], + "1": [ + "versions.yml:md5,a1bf48540f950da8922ef24da42f6ec5" + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test_patient_test_.json:md5,401207e1ed3fb2708291a8eeff5efcd7" + ] + ], + "versions": [ + "versions.yml:md5,a1bf48540f950da8922ef24da42f6ec5" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.04.4" + }, + "timestamp": "2024-11-29T15:36:59.107384" }, "neoantigenutils_neoantigeninput - json,tsv": { "content": [ @@ -43,11 +78,11 @@ "id": "test", "single_end": false }, - "test_patient_test_.json:md5,a4442316ba2b6f404b8eb42dd6558eae" + "test_patient_test_.json:md5,f9db7a487cbd4aad167819d885a9a9e3" ] ], "1": [ - "versions.yml:md5,4815d54ee3ae0778628a1e51b4b21d9b" + "versions.yml:md5,a1bf48540f950da8922ef24da42f6ec5" ], "json": [ [ @@ -55,18 +90,18 @@ "id": "test", "single_end": false }, - "test_patient_test_.json:md5,a4442316ba2b6f404b8eb42dd6558eae" + "test_patient_test_.json:md5,f9db7a487cbd4aad167819d885a9a9e3" ] ], "versions": [ - "versions.yml:md5,4815d54ee3ae0778628a1e51b4b21d9b" + "versions.yml:md5,a1bf48540f950da8922ef24da42f6ec5" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.04.4" }, - "timestamp": "2024-07-01T16:08:31.862365529" + "timestamp": "2024-11-29T15:37:06.847468" } } \ No newline at end of file diff --git a/modules/msk/netmhc/meta.yml b/modules/msk/netmhc/meta.yml new file mode 100644 index 0000000..24337dc --- /dev/null +++ b/modules/msk/netmhc/meta.yml @@ -0,0 +1,67 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "netmhc" +description: write your description here +keywords: + - immune + - netmhcpan + - genomics +tools: + - "netmhcpan": + description: " Runs netMHCpan and outputs tsvs and STDout for mutated and wild type neoantigens" + homepage: "https://services.healthtech.dtu.dk/services/NetMHCpan-4.1/" + documentation: "https://services.healthtech.dtu.dk/services/NetMHCpan-4.1/" + licence: ["MIT"] + +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - inputMaf: + type: file + description: Maf outputtted by Tempo that was run through phyloWGS + pattern: "*.{maf}" + + - hlaFile: + type: file + description: HLA tsv outputtted by Polysolver + pattern: "winners.{tsv}" + + - inputType: + type: string + description: Allows netmhcpan to run in parallel. Should be 'MUT' or 'WT', it will kick off two jobs. make a Channel.Of('MUT','WT') outside the module as an input. Running them in series is kicked off by putting in anything other than MUT or WT. + pattern: "*" + +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + + - xls: + type: file + description: TSV/XLS file of netMHCpan. A poorly formated file of neoantigens. This contains the MUT or WT antigens + pattern: "*.xls" + + - netmhcpanoutput: + type: file + description: STDOUT file of netMHCpan. A poorly formated file of neoantigens. This contains either the MUT or WT neoantigens. Neoantigenutils contains a parser for this file. + pattern: "*.WT.netmhcpan.output,*.MUT.netmhcpan.output" + +authors: + - "@johnoooh" + - "@nikhil" +maintainers: + - "@johnoooh" + - "@nikhil" diff --git a/modules/msk/netmhc3/environment.yml b/modules/msk/netmhc3/environment.yml new file mode 100644 index 0000000..4c59b93 --- /dev/null +++ b/modules/msk/netmhc3/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "YOUR-TOOL=HERE" diff --git a/modules/msk/netmhc3/main.nf b/modules/msk/netmhc3/main.nf new file mode 100644 index 0000000..8a4c013 --- /dev/null +++ b/modules/msk/netmhc3/main.nf @@ -0,0 +1,79 @@ +process NETMHC3 { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://mskcc/netmhctools:1.1.0': + 'docker.io/mskcc/netmhctools:1.1.0' }" + + input: + tuple val(meta), path(inputFasta), path(inputSVFasta, arity: '0..*'), val(hlaString), val(inputType) + + output: + tuple val(output_meta), path("*.xls"), emit: xls + tuple val(output_meta), path("*.netmhc.output"), emit: netmhcoutput + tuple val(output_meta), path("*.hla_*.txt"), emit: netmhc_hla_files + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def hla = hlaString.trim() + output_meta = meta.clone() + output_meta.typeMut = inputType == "MUT" ? true : false + output_meta.fromStab = false + output_meta.typePan = false + def NETMHC_VERSION = "3.4" + def tmpDir = "netmhc-tmp" + def tmpDirFullPath = "\$PWD/${tmpDir}/" // must set full path to tmp directories for netMHC and netMHCpan to work; for some reason doesn't work with /scratch, so putting them in the process workspace + + """ + export TMPDIR=${tmpDirFullPath} + mkdir -p ${tmpDir} + chmod 777 ${tmpDir} + + HLA_ACCEPTED=\$(trim_hla.py --hla ${hla}) + + cat ${inputSVFasta} >> ${inputFasta} + + /usr/local/bin/netMHC-3.4/netMHC \ + -a \$HLA_ACCEPTED \ + -s \ + -l 9 \ + --xls=${prefix}.${inputType}.xls \ + ${inputFasta} > ${prefix}.${inputType}.netmhc.output + + mv hla_accepted.txt ${prefix}.hla_accepted.txt + mv hla_rejected.txt ${prefix}.hla_rejected.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + netmhc: v${NETMHC_VERSION} + END_VERSIONS + + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def NETMHC_VERSION = "3.4" + output_meta = meta.clone() + output_meta.typeMut = inputType == "MUT" ? true : false + output_meta.fromStab = false + output_meta.typePan = false + """ + touch ${prefix}.MUT.netmhc.output + touch ${prefix}.MUT.xls + touch ${prefix}.hla_accepted.txt + touch ${prefix}.hla_rejected.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + netmhc: v${NETMHC_VERSION} + END_VERSIONS + """ +} diff --git a/modules/msk/netmhc3/meta.yml b/modules/msk/netmhc3/meta.yml new file mode 100644 index 0000000..f920ab2 --- /dev/null +++ b/modules/msk/netmhc3/meta.yml @@ -0,0 +1,104 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "netmhc3" +description: Predicts binding of neoantigen peptides +keywords: + - immune + - netmhc + - genomics +tools: + - "netmhc3": + description: + "Runs netMHC and outputs tsvs and STDout for mutated and wild type + neoantigens" + homepage: "https://services.healthtech.dtu.dk/services/netmhc-4.1/" + documentation: "https://services.healthtech.dtu.dk/services/netmhc-4.1/" + licence: ["MIT"] + identifier: "" + +input: + # Only when we have meta + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - inputFasta: + type: file + description: + Multifasta containing Mutated peptides or Wildtype peptides generated + from generatemutfasta + pattern: "*.{fa}" + - inputSVFasta: + type: file + description: + Multifasta containing Mutated peptides or Wildtype peptides from + NeoSV + pattern: "*.{fa}" + - hlaString: + type: string + description: + HLA string formatted by generatehlastring. Typically a comma separated + string of HLAs. + - inputType: + type: string + description: + Allows netmhc to run in parallel. Should be 'MUT' or 'WT', it + will kick off two jobs. make a Channel.Of('MUT','WT') outside the module as + an input. Running them in series is kicked off by putting in anything other + than MUT or WT. + pattern: "WT,MUT" +output: + #Only when we have meta + - netmhcoutput: + - output_meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.netmhc.output": + type: file + description: + STDOUT file of netMHCpan. A poorly formated file of neoantigens. This + contains either the MUT or WT neoantigens. Neoantigenutils contains a parser + for this file. + pattern: "*.WT.netmhc.output,*.MUT.netmhc.output" + + - xls: + - output_meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.xls": + type: file + description: + XLS file of netMHC. A poorly formated file of neoantigens. This + contains the MUT or WT antigens + pattern: "*.WT.xls,*.MUT.xls" + + - netmhc_hla_files: + - output_meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.hla_*.txt": + type: file + description: + STDOUT file of netMHC. A poorly formated file of neoantigens. This + contains either the MUT or WT neoantigens. Neoantigenutils contains a parser + for this file. + pattern: "*.hla_accepted.txt,*.hla_rejected.txt" + + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@johnoooh" + - "@nikhil" +maintainers: + - "@johnoooh" + - "@nikhil" diff --git a/modules/msk/netmhc3/resources/usr/bin/trim_hla.py b/modules/msk/netmhc3/resources/usr/bin/trim_hla.py new file mode 100755 index 0000000..8fa8ab4 --- /dev/null +++ b/modules/msk/netmhc3/resources/usr/bin/trim_hla.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python3 + +import argparse + +VERSION = 1.0 + +ACCEPTED_ALLELES = [ + "BoLA-D18.4", + "BoLA-HD6", + "BoLA-JSP.1", + "BoLA-T2C", + "BoLA-T2a", + "BoLA-T2b", + "H-2-Db", + "H-2-Dd", + "H-2-Kb", + "H-2-Kd", + "H-2-Kk", + "H-2-Ld", + "HLA-A01:01", + "HLA-A02:01", + "HLA-A02:02", + "HLA-A02:03", + "HLA-A02:06", + "HLA-A02:11", + "HLA-A02:12", + "HLA-A02:16", + "HLA-A02:17", + "HLA-A02:19", + "HLA-A02:50", + "HLA-A03:01", + "HLA-A11:01", + "HLA-A23:01", + "HLA-A24:02", + "HLA-A24:03", + "HLA-A25:01", + "HLA-A26:01", + "HLA-A26:02", + "HLA-A26:03", + "HLA-A29:02", + "HLA-A30:01", + "HLA-A30:02", + "HLA-A31:01", + "HLA-A32:01", + "HLA-A32:07", + "HLA-A32:15", + "HLA-A33:01", + "HLA-A66:01", + "HLA-A68:01", + "HLA-A68:02", + "HLA-A68:23", + "HLA-A69:01", + "HLA-A80:01", + "HLA-B07:02", + "HLA-B08:01", + "HLA-B08:02", + "HLA-B08:03", + "HLA-B14:02", + "HLA-B15:01", + "HLA-B15:02", + "HLA-B15:03", + "HLA-B15:09", + "HLA-B15:17", + "HLA-B18:01", + "HLA-B27:05", + "HLA-B27:20", + "HLA-B35:01", + "HLA-B35:03", + "HLA-B38:01", + "HLA-B39:01", + "HLA-B40:01", + "HLA-B40:02", + "HLA-B40:13", + "HLA-B42:01", + "HLA-B44:02", + "HLA-B44:03", + "HLA-B45:01", + "HLA-B46:01", + "HLA-B48:01", + "HLA-B51:01", + "HLA-B53:01", + "HLA-B54:01", + "HLA-B57:01", + "HLA-B58:01", + "HLA-B73:01", + "HLA-B83:01", + "HLA-C03:03", + "HLA-C04:01", + "HLA-C05:01", + "HLA-C06:02", + "HLA-C07:01", + "HLA-C07:02", + "HLA-C08:02", + "HLA-C12:03", + "HLA-C14:02", + "HLA-C15:02", + "HLA-E01:01", + "Mamu-A01", + "Mamu-A02", + "Mamu-A07", + "Mamu-A11", + "Mamu-A20102", + "Mamu-A2201", + "Mamu-A2601", + "Mamu-A70103", + "Mamu-B01", + "Mamu-B03", + "Mamu-B08", + "Mamu-B1001", + "Mamu-B17", + "Mamu-B3901", + "Mamu-B52", + "Mamu-B6601", + "Mamu-B8301", + "Mamu-B8701", + "Patr-A0101", + "Patr-A0301", + "Patr-A0401", + "Patr-A0701", + "Patr-A0901", + "Patr-B0101", + "Patr-B1301", + "Patr-B2401", + "SLA-10401", + "SLA-20401", + "SLA-30401", +] + + +def trim_hla(hla_string): + rejected_list = [] + accepted_list = [] + hla_alleles = hla_string.split(",") + for single_hla_allele in hla_alleles: + if single_hla_allele not in ACCEPTED_ALLELES: + rejected_list.append(single_hla_allele) + else: + accepted_list.append(single_hla_allele) + with open("hla_accepted.txt", "w") as accepted_file: + accepted_hlas = "\n".join(accepted_list) + accepted_file.write(accepted_hlas) + with open("hla_rejected.txt", "w") as rejected_file: + rejected_hlas = "\n".join(rejected_list) + rejected_file.write(rejected_hlas) + accepted_output = ",".join(accepted_list) + print(accepted_output) + + +def main(): + parser = argparse.ArgumentParser(description="Only accept HLA alleles that are accepted by netmhc 3.4") + parser.add_argument("--hla", help="Hla allele string") + parser.add_argument("-v", "--version", action="version", version="v{}".format(VERSION)) + args = parser.parse_args() + trim_hla(args.hla) + + +if __name__ == "__main__": + main() diff --git a/modules/msk/netmhc3/tests/main.nf.test b/modules/msk/netmhc3/tests/main.nf.test new file mode 100644 index 0000000..393d6ba --- /dev/null +++ b/modules/msk/netmhc3/tests/main.nf.test @@ -0,0 +1,171 @@ +nextflow_process { + + name "Test Process NETMHC3" + script "../main.nf" + process "NETMHC3" + + tag "modules" + tag "modules_nfcore" + tag "netmhc3" + tag "modules_msk" + + test("netmhc3 - MUT,SV - xls,output,fa") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(file(params.test_data_mskcc['neoantigen']['MUT_sequence_fa']), checkIfExists: true), + file(file(params.test_data_mskcc['neoantigen']['svfa']), checkIfExists: true), + "HLA-A24:02,HLA-A24:02,HLA-B39:01,HLA-B39:01,HLA-C07:01,HLA-C06:02", + "MUT" + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.netmhcoutput[0][0], + file(process.out.xls[0][1]).name, + file(process.out.netmhcoutput[0][1]).name + ).match() + } + ) + } + + } + + test("netmhc3 - WT,SV - xls,output,fa") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(file(params.test_data_mskcc['neoantigen']['WT_sequence_fa']), checkIfExists: true), + file(file(params.test_data_mskcc['neoantigen']['wtsvfa']), checkIfExists: true), + "HLA-A24:02,HLA-A24:02,HLA-B39:01,HLA-B39:01,HLA-C07:01,HLA-C06:02", + "WT" + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.netmhcoutput[0][0], + file(process.out.xls[0][1]).name, + file(process.out.netmhcoutput[0][1]).name + ).match() + } + ) + } + + } + + test("netmhc3 - MUT - xls,output,fa") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(file(params.test_data_mskcc['neoantigen']['MUT_sequence_fa']), checkIfExists: true), + [], + "HLA-A24:02,HLA-A24:02,HLA-B39:01,HLA-B39:01,HLA-C07:01,HLA-C06:02", + "MUT" + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.netmhcoutput[0][0], + file(process.out.xls[0][1]).name, + file(process.out.netmhcoutput[0][1]).name + ).match() + } + ) + } + + } + + test("netmhc3 - WT - xls,output,fa") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(file(params.test_data_mskcc['neoantigen']['WT_sequence_fa']), checkIfExists: true), + [], + "HLA-A24:02,HLA-A24:02,HLA-B39:01,HLA-B39:01,HLA-C07:01,HLA-C06:02", + "WT" + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.netmhcoutput[0][0], + file(process.out.xls[0][1]).name, + file(process.out.netmhcoutput[0][1]).name + ).match() + } + ) + } + + } + + + + test("netmhc3 - xls,output,fa - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file('MUT_sequence_fa'), + file('svfa'), + "HLA", + "MUT" + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.netmhcoutput[0][0], + file(process.out.xls[0][1]).name, + file(process.out.netmhcoutput[0][1]).name + ).match() + } + ) + } + + } + +} diff --git a/modules/msk/netmhc3/tests/main.nf.test.snap b/modules/msk/netmhc3/tests/main.nf.test.snap new file mode 100644 index 0000000..ac230cb --- /dev/null +++ b/modules/msk/netmhc3/tests/main.nf.test.snap @@ -0,0 +1,107 @@ +{ + "netmhc3 - MUT,SV - xls,output,fa": { + "content": [ + [ + "versions.yml:md5,41df6059e67545fe2fe2734514557bb7" + ], + { + "id": "test", + "single_end": false, + "typeMut": true, + "fromStab": false, + "typePan": false + }, + "test.MUT.xls", + "test.MUT.netmhc.output" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T09:00:51.333935874" + }, + "netmhc3 - xls,output,fa - stub": { + "content": [ + [ + "versions.yml:md5,41df6059e67545fe2fe2734514557bb7" + ], + { + "id": "test", + "single_end": false, + "typeMut": true, + "fromStab": false, + "typePan": false + }, + "test.MUT.xls", + "test.MUT.netmhc.output" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T09:02:00.341675799" + }, + "netmhc3 - WT,SV - xls,output,fa": { + "content": [ + [ + "versions.yml:md5,41df6059e67545fe2fe2734514557bb7" + ], + { + "id": "test", + "single_end": false, + "typeMut": false, + "fromStab": false, + "typePan": false + }, + "test.WT.xls", + "test.WT.netmhc.output" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T09:01:14.49522341" + }, + "netmhc3 - WT - xls,output,fa": { + "content": [ + [ + "versions.yml:md5,41df6059e67545fe2fe2734514557bb7" + ], + { + "id": "test", + "single_end": false, + "typeMut": false, + "fromStab": false, + "typePan": false + }, + "test.WT.xls", + "test.WT.netmhc.output" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T09:01:55.331175249" + }, + "netmhc3 - MUT - xls,output,fa": { + "content": [ + [ + "versions.yml:md5,41df6059e67545fe2fe2734514557bb7" + ], + { + "id": "test", + "single_end": false, + "typeMut": true, + "fromStab": false, + "typePan": false + }, + "test.MUT.xls", + "test.MUT.netmhc.output" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T09:01:34.648692712" + } +} \ No newline at end of file diff --git a/modules/msk/netmhc3/tests/tags.yml b/modules/msk/netmhc3/tests/tags.yml new file mode 100644 index 0000000..e431a8e --- /dev/null +++ b/modules/msk/netmhc3/tests/tags.yml @@ -0,0 +1,2 @@ +netmhc3: + - "modules/msk/netmhc3/**" diff --git a/modules/msk/netmhcpan4/environment.yml b/modules/msk/netmhcpan4/environment.yml new file mode 100644 index 0000000..4c59b93 --- /dev/null +++ b/modules/msk/netmhcpan4/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "YOUR-TOOL=HERE" diff --git a/modules/msk/netmhcpan4/main.nf b/modules/msk/netmhcpan4/main.nf new file mode 100644 index 0000000..d6c603f --- /dev/null +++ b/modules/msk/netmhcpan4/main.nf @@ -0,0 +1,77 @@ +process NETMHCPAN4 { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://mskcc/netmhctools:1.1.0': + 'docker.io/mskcc/netmhctools:1.1.0' }" + + input: + tuple val(meta), path(inputFasta), path(inputSVFasta, arity: '0..*'), val(hlaString), val(inputType) + + output: + tuple val(output_meta), path("*.xls"), emit: xls + tuple val(output_meta), path("*.netmhcpan.output"), emit: netmhcpanoutput + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def hla = hlaString.trim() + output_meta = meta.clone() + output_meta.typeMut = inputType == "MUT" ? true : false + output_meta.fromStab = false + output_meta.typePan = true + def NETMHCPAN_VERSION = "4.1" + def tmpDir = "netmhc-tmp" + def tmpDirFullPath = "\$PWD/${tmpDir}/" // must set full path to tmp directories for netMHC and netMHCpan to work; for some reason doesn't work with /scratch, so putting them in the process workspace + + """ + export TMPDIR=${tmpDirFullPath} + mkdir -p ${tmpDir} + chmod 777 ${tmpDir} + + + cat ${inputSVFasta} >> ${inputFasta} + /usr/local/bin/netMHCpan-${NETMHCPAN_VERSION}/netMHCpan \ + -s 0 \ + -BA 1 \ + -f ${inputFasta} \ + -a ${hla} \ + -l 9,10 \ + -inptype 0 \ + -xls \ + ${args} \ + -xlsfile \ + ${prefix}.${inputType}.xls > ${prefix}.${inputType}.netmhcpan.output + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + netmhcpan: v${NETMHCPAN_VERSION} + END_VERSIONS + + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def NETMHCPAN_VERSION = "4.1" + output_meta = meta.clone() + output_meta.typeMut = inputType == "MUT" ? true : false + output_meta.fromStab = false + output_meta.typePan = true + """ + touch ${prefix}.MUT.xls + touch ${prefix}.MUT.netmhcpan.output + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + netmhcpan: v${NETMHCPAN_VERSION} + END_VERSIONS + """ +} diff --git a/modules/msk/netmhcpan4/meta.yml b/modules/msk/netmhcpan4/meta.yml new file mode 100644 index 0000000..da25d9d --- /dev/null +++ b/modules/msk/netmhcpan4/meta.yml @@ -0,0 +1,85 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "netmhcpan4" +description: Predicts binding of neoantigen peptides +keywords: + - immune + - netmhcpan + - genomics +tools: + - "netmhcpan": + description: + " Runs netMHCpan and outputs tsvs and STDout for mutated and wild + type neoantigens" + homepage: "https://services.healthtech.dtu.dk/services/NetMHCpan-4.1/" + documentation: "https://services.healthtech.dtu.dk/services/NetMHCpan-4.1/" + licence: ["MIT"] + identifier: biotools:netmhcpan + +input: + # Only when we have meta + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - inputFasta: + type: file + description: + Multifasta containing Mutated peptides or Wildtype peptides generated + from generatemutfasta + pattern: "*.{fa}" + - inputSVFasta: + type: file + description: + Multifasta containing Mutated peptides or Wildtype peptides from + NeoSV + pattern: "*.{fa}" + - hlaString: + type: string + description: + HLA string formatted by generatehlastring. Typically a comma separated + string of HLAs. + - inputType: + type: string + description: + Allows netmhcpan to run in parallel. Should be 'MUT' or 'WT', it + will kick off two jobs. make a Channel.Of('MUT','WT') outside the module as + an input. Running them in series is kicked off by putting in anything other + than MUT or WT. + pattern: "WT,MUT" +output: + #Only when we have meta + - xls: + - output_meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.xls": + type: file + description: XLS file of netMHC. A poorly formated file of neoantigens. This contains the MUT or WT antigens + pattern: "*.WT.xls,*.MUT.xls" + - netmhcpanoutput: + - output_meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + pattern: "*.WT.netmhcpan.output,*.MUT.netmhcpan.output" + - "*.netmhcpan.output": + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + pattern: "*.WT.netmhcpan.output,*.MUT.netmhcpan.output" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@johnoooh" + - "@nikhil" +maintainers: + - "@johnoooh" + - "@nikhil" diff --git a/modules/msk/netmhcpan4/tests/main.nf.test b/modules/msk/netmhcpan4/tests/main.nf.test new file mode 100644 index 0000000..81d7ac9 --- /dev/null +++ b/modules/msk/netmhcpan4/tests/main.nf.test @@ -0,0 +1,171 @@ +nextflow_process { + + name "Test Process NETMHCPAN4" + script "../main.nf" + process "NETMHCPAN4" + + tag "modules" + tag "modules_nfcore" + tag "netmhcpan4" + tag "modules_msk" + + test("netmhcpan4 - MUT,SV - xls,output,fa") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(file(params.test_data_mskcc['neoantigen']['MUT_sequence_fa']), checkIfExists: true), + file(file(params.test_data_mskcc['neoantigen']['svfa']), checkIfExists: true), + "HLA-A24:02,HLA-A24:02,HLA-B39:01,HLA-B39:01,HLA-C07:01,HLA-C06:02", + "MUT" + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.netmhcpanoutput[0][0], + file(process.out.xls[0][1]).name, + file(process.out.netmhcpanoutput[0][1]).name + ).match() + } + ) + } + + } + + test("netmhcpan4 - WT,SV - xls,output,fa") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(file(params.test_data_mskcc['neoantigen']['WT_sequence_fa']), checkIfExists: true), + file(file(params.test_data_mskcc['neoantigen']['wtsvfa']), checkIfExists: true), + "HLA-A24:02,HLA-A24:02,HLA-B39:01,HLA-B39:01,HLA-C07:01,HLA-C06:02", + "WT" + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.netmhcpanoutput[0][0], + file(process.out.xls[0][1]).name, + file(process.out.netmhcpanoutput[0][1]).name + ).match() + } + ) + } + + } + + test("netmhcpan4 - MUT - xls,output,fa") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(file(params.test_data_mskcc['neoantigen']['MUT_sequence_fa']), checkIfExists: true), + [], + "HLA-A24:02,HLA-A24:02,HLA-B39:01,HLA-B39:01,HLA-C07:01,HLA-C06:02", + "MUT" + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.netmhcpanoutput[0][0], + file(process.out.xls[0][1]).name, + file(process.out.netmhcpanoutput[0][1]).name + ).match() + } + ) + } + + } + + test("netmhcpan4 - WT - xls,output,fa") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(file(params.test_data_mskcc['neoantigen']['WT_sequence_fa']), checkIfExists: true), + [], + "HLA-A24:02,HLA-A24:02,HLA-B39:01,HLA-B39:01,HLA-C07:01,HLA-C06:02", + "WT" + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.netmhcpanoutput[0][0], + file(process.out.xls[0][1]).name, + file(process.out.netmhcpanoutput[0][1]).name + ).match() + } + ) + } + + } + + + + test("netmhcpan4 - xls,output,fa - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file('MUT_sequence_fa'), + file('svfa'), + "HLA", + "MUT" + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.netmhcpanoutput[0][0], + file(process.out.xls[0][1]).name, + file(process.out.netmhcpanoutput[0][1]).name + ).match() + } + ) + } + + } + +} diff --git a/modules/msk/netmhcpan4/tests/main.nf.test.snap b/modules/msk/netmhcpan4/tests/main.nf.test.snap new file mode 100644 index 0000000..fa054ef --- /dev/null +++ b/modules/msk/netmhcpan4/tests/main.nf.test.snap @@ -0,0 +1,107 @@ +{ + "netmhcpan4 - WT - xls,output,fa": { + "content": [ + [ + "versions.yml:md5,25030ae1a63e68bd94347a4d30cc2413" + ], + { + "id": "test", + "single_end": false, + "typeMut": false, + "fromStab": false, + "typePan": true + }, + "test.WT.xls", + "test.WT.netmhcpan.output" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T10:29:28.616914826" + }, + "netmhcpan4 - MUT - xls,output,fa": { + "content": [ + [ + "versions.yml:md5,25030ae1a63e68bd94347a4d30cc2413" + ], + { + "id": "test", + "single_end": false, + "typeMut": true, + "fromStab": false, + "typePan": true + }, + "test.MUT.xls", + "test.MUT.netmhcpan.output" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T10:26:02.108432464" + }, + "netmhcpan4 - xls,output,fa - stub": { + "content": [ + [ + "versions.yml:md5,25030ae1a63e68bd94347a4d30cc2413" + ], + { + "id": "test", + "single_end": false, + "typeMut": true, + "fromStab": false, + "typePan": true + }, + "test.MUT.xls", + "test.MUT.netmhcpan.output" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T10:29:33.956254062" + }, + "netmhcpan4 - MUT,SV - xls,output,fa": { + "content": [ + [ + "versions.yml:md5,25030ae1a63e68bd94347a4d30cc2413" + ], + { + "id": "test", + "single_end": false, + "typeMut": true, + "fromStab": false, + "typePan": true + }, + "test.MUT.xls", + "test.MUT.netmhcpan.output" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T10:20:37.334041838" + }, + "netmhcpan4 - WT,SV - xls,output,fa": { + "content": [ + [ + "versions.yml:md5,25030ae1a63e68bd94347a4d30cc2413" + ], + { + "id": "test", + "single_end": false, + "typeMut": false, + "fromStab": false, + "typePan": true + }, + "test.WT.xls", + "test.WT.netmhcpan.output" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T10:24:39.739383639" + } +} \ No newline at end of file diff --git a/modules/msk/netmhcpan4/tests/tags.yml b/modules/msk/netmhcpan4/tests/tags.yml new file mode 100644 index 0000000..7e2aa7f --- /dev/null +++ b/modules/msk/netmhcpan4/tests/tags.yml @@ -0,0 +1,2 @@ +netmhcpan4: + - "modules/msk/netmhcpan4/**" diff --git a/modules/msk/netmhcstabpan/environment.yml b/modules/msk/netmhcstabpan/environment.yml index b87b3c7..4c59b93 100644 --- a/modules/msk/netmhcstabpan/environment.yml +++ b/modules/msk/netmhcstabpan/environment.yml @@ -1,10 +1,7 @@ -name: netmhcstabpan - +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda - - defaults - dependencies: - - netmhcpan - - netmhcstabpan + - "YOUR-TOOL=HERE" diff --git a/modules/msk/netmhcstabpan/main.nf b/modules/msk/netmhcstabpan/main.nf index 316195f..da1dce3 100644 --- a/modules/msk/netmhcstabpan/main.nf +++ b/modules/msk/netmhcstabpan/main.nf @@ -4,11 +4,11 @@ process NETMHCSTABPAN { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://mskcc/netmhctools:1.0.0': - 'docker.io/mskcc/netmhctools:1.0.0' }" + 'docker://mskcc/netmhctools:1.1.0': + 'docker.io/mskcc/netmhctools:1.1.0' }" input: - tuple val(meta), path(inputFasta), val(hlaString), val(inputType) + tuple val(meta), path(inputFasta), path(inputSVFasta, arity: '0..*'), val(hlaString), val(inputType) output: @@ -25,11 +25,20 @@ process NETMHCSTABPAN { output_meta = meta.clone() output_meta.typeMut = inputType == "MUT" ? true : false output_meta.fromStab = true + output_meta.typePan = true def NETMHCPAN_VERSION = "4.1" def NETMHCSTABPAN_VERSION = "1.0" + + def tmpDir = "netmhc-tmp" + def tmpDirFullPath = "\$PWD/${tmpDir}/" // must set full path to tmp directories for netMHC and netMHCpan to work; for some reason doesn't work with /scratch, so putting them in the process workspace """ + export TMPDIR=${tmpDirFullPath} + mkdir -p ${tmpDir} + chmod 777 ${tmpDir} + + cat ${inputSVFasta} >> ${inputFasta} /usr/local/bin/netMHCstabpan-${NETMHCSTABPAN_VERSION}/netMHCstabpan \ -s -1 \ @@ -52,6 +61,7 @@ process NETMHCSTABPAN { output_meta = meta.clone() output_meta.typeMut = inputType == "MUT" ? true : false output_meta.fromStab = true + output_meta.typePan = true def NETMHCPAN_VERSION = "4.1" def NETMHCSTABPAN_VERSION = "1.0" diff --git a/modules/msk/netmhcstabpan/meta.yml b/modules/msk/netmhcstabpan/meta.yml index ec5e22c..d9bc3dc 100644 --- a/modules/msk/netmhcstabpan/meta.yml +++ b/modules/msk/netmhcstabpan/meta.yml @@ -1,7 +1,8 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "netmhcstabpan" -description: Runs netMHCpan and netMHCstabpan and outputs STDout for mutated and wild type neoantigens" +description: + Runs netMHCpan and netMHCstabpan and outputs STDout for mutated and wild + type neoantigens" keywords: - immune - netmhcstabpan @@ -9,55 +10,61 @@ keywords: - genomics tools: - "netmhcstabpan": - description: " Runs netMHCstabpan and netMHCpan then outputs tsvs and STDout for mutated and wild type neoantigens" + description: + " Runs netMHCstabpan and netMHCpan then outputs tsvs and STDout for + mutated and wild type neoantigens" homepage: "https://services.healthtech.dtu.dk/services/NetMHCstabpan-1.0/" documentation: "https://services.healthtech.dtu.dk/services/NetMHCstabpan-1.0/" licence: ["MIT"] - - - "netmhcpan": - description: " Runs netMHCpan and outputs tsvs and STDout for mutated and wild type neoantigens" - homepage: "https://services.healthtech.dtu.dk/services/NetMHCpan-4.1/" - documentation: "https://services.healthtech.dtu.dk/services/NetMHCpan-4.1/" - licence: ["MIT"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - - inputMaf: - type: file - description: Maf outputtted by Tempo that was run through phyloWGS - pattern: "*.{maf}" - - - hlaString: - type: string - description: HLA in string format. e.g. HLA-A24:02 - - - inputType: - type: string - description: Allows netmhcstabpan to run in parallel. Should be 'MUT' or 'WT', it will kick off two jobs. make a Channel.Of('MUT','WT') outside the module as an input. Running them in series is kicked off by putting in anything other than MUT or WT. - pattern: "*" - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - inputFasta: + type: file + description: + Multifasta containing Mutated peptides or Wildtype peptides generated + from generatemutfasta + pattern: "*.{fa}" + - inputSVFasta: + type: file + description: + Multifasta containing Mutated peptides or Wildtype peptides from + NeoSV + pattern: "*.{fa}" + - hlaString: + type: string + description: HLA in string format. e.g. HLA-A24:02 + - inputType: + type: string + description: + Allows netmhcstabpan to run in parallel. Should be 'MUT' or 'WT', + it will kick off two jobs. make a Channel.Of('MUT','WT') outside the module + as an input. Running them in series is kicked off by putting in anything other + than MUT or WT. + pattern: "WT,MUT" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - netmhcstabpanoutput: - type: file - description: STDOUT file of netMHCstabpan runs for MUT and WT. A poorly formated file of neoantigens. Neoantigenutils contains a parser for this file - pattern: "*.WT.netmhcstabpan.output,*.MUT.netmhcstabpan.output" - + - output_meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.netmhcstabpan.output": + type: file + description: + STDOUT file of netMHCstabpan runs for MUT and WT. A poorly formated + file of neoantigens. Neoantigenutils contains a parser for this file + pattern: "*.WT.netmhcstabpan.output,*.MUT.netmhcstabpan.output" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@johnoooh" - "@nikhil" diff --git a/modules/msk/netmhcstabpan/tests/main.nf.test b/modules/msk/netmhcstabpan/tests/main.nf.test index 390f061..d62f758 100644 --- a/modules/msk/netmhcstabpan/tests/main.nf.test +++ b/modules/msk/netmhcstabpan/tests/main.nf.test @@ -9,6 +9,66 @@ nextflow_process { tag "netmhcstabpan" tag "modules_msk" + test("netmhcstabpan - MUT,SV - xls,output,fa") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(file(params.test_data_mskcc['neoantigen']['MUT_sequence_fa']), checkIfExists: true), + file(file(params.test_data_mskcc['neoantigen']['svfa']), checkIfExists: true), + "HLA-A24:02,HLA-A24:02,HLA-B39:01,HLA-B39:01,HLA-C07:01,HLA-C06:02", + "MUT" + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.netmhcstabpanoutput[0][0], + file(process.out.netmhcstabpanoutput[0][1]).name + ).match() + } + ) + } + + } + + test("netmhcstabpan - WT,SV - xls,output,fa") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(file(params.test_data_mskcc['neoantigen']['WT_sequence_fa']), checkIfExists: true), + file(file(params.test_data_mskcc['neoantigen']['svfa']), checkIfExists: true), + "HLA-A24:02,HLA-A24:02,HLA-B39:01,HLA-B39:01,HLA-C07:01,HLA-C06:02", + "WT" + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.netmhcstabpanoutput[0][0], + file(process.out.netmhcstabpanoutput[0][1]).name + ).match() + } + ) + } + + } + test("netmhcstabpan - MUT - xls,output,fa") { when { @@ -17,6 +77,7 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map file(file(params.test_data_mskcc['neoantigen']['MUT_sequence_fa']), checkIfExists: true), + [], "HLA-A24:02,HLA-A24:02,HLA-B39:01,HLA-B39:01,HLA-C07:01,HLA-C06:02", "MUT" ] @@ -46,6 +107,7 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map file(file(params.test_data_mskcc['neoantigen']['WT_sequence_fa']), checkIfExists: true), + [], "HLA-A24:02,HLA-A24:02,HLA-B39:01,HLA-B39:01,HLA-C07:01,HLA-C06:02", "WT" ] @@ -67,6 +129,7 @@ nextflow_process { } + test("netmhcstabpan - xls,output,fa - stub") { options "-stub" @@ -78,6 +141,7 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map file('MUT_sequence_fa'), + file('svfa'), "HLA", "MUT" ] diff --git a/modules/msk/netmhcstabpan/tests/main.nf.test.snap b/modules/msk/netmhcstabpan/tests/main.nf.test.snap index 0c95eca..eda2c92 100644 --- a/modules/msk/netmhcstabpan/tests/main.nf.test.snap +++ b/modules/msk/netmhcstabpan/tests/main.nf.test.snap @@ -8,15 +8,36 @@ "id": "test", "single_end": false, "typeMut": true, - "fromStab": true + "fromStab": true, + "typePan": true }, "test.MUT.netmhcstabpan.output" ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-06-13T11:11:05.586612" + "timestamp": "2024-11-25T09:20:13.007454444" + }, + "netmhcstabpan - WT,SV - xls,output,fa": { + "content": [ + [ + "versions.yml:md5,d1b3afd365748a44098c5642fad6c94a" + ], + { + "id": "test", + "single_end": false, + "typeMut": false, + "fromStab": true, + "typePan": true + }, + "test.WT.netmhcstabpan.output" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T09:15:21.840510308" }, "netmhcstabpan - xls,output,fa - stub": { "content": [ @@ -27,15 +48,16 @@ "id": "test", "single_end": false, "typeMut": true, - "fromStab": true + "fromStab": true, + "typePan": true }, "test.MUT.netmhcstabpan.output" ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-06-13T11:12:06.099529" + "timestamp": "2024-11-25T09:25:26.180428087" }, "netmhcstabpan - WT - xls,output,fa": { "content": [ @@ -46,14 +68,35 @@ "id": "test", "single_end": false, "typeMut": false, - "fromStab": true + "fromStab": true, + "typePan": true }, "test.WT.netmhcstabpan.output" ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T09:25:21.015015849" + }, + "netmhcstabpan - MUT,SV - xls,output,fa": { + "content": [ + [ + "versions.yml:md5,d1b3afd365748a44098c5642fad6c94a" + ], + { + "id": "test", + "single_end": false, + "typeMut": true, + "fromStab": true, + "typePan": true + }, + "test.MUT.netmhcstabpan.output" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-06-13T11:12:01.701587" + "timestamp": "2024-11-25T09:09:53.461848085" } } \ No newline at end of file diff --git a/modules/msk/phylowgs/createinput/environment.yml b/modules/msk/phylowgs/createinput/environment.yml index 0d609eb..2db32da 100644 --- a/modules/msk/phylowgs/createinput/environment.yml +++ b/modules/msk/phylowgs/createinput/environment.yml @@ -1,8 +1,8 @@ --- -name: "phylowgs_createinput" +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda - - defaults + dependencies: - - "PHYLOWGS" + - "YOUR-TOOL=HERE" diff --git a/modules/msk/phylowgs/createinput/meta.yml b/modules/msk/phylowgs/createinput/meta.yml index 9b8f5f9..1ab61f1 100644 --- a/modules/msk/phylowgs/createinput/meta.yml +++ b/modules/msk/phylowgs/createinput/meta.yml @@ -1,4 +1,3 @@ ---- name: "phylowgs_createinput" description: Create input files for phylowgs keywords: @@ -11,32 +10,41 @@ tools: homepage: "https://genomebiology.biomedcentral.com/articles/10.1186/s13059-015-0602-8" tool_dev_url: "https://github.com/mskcc/phylowgs" + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - cnv: - type: file - description: converted cnv file for phylowgs - pattern: "*.txt" - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - unfilteredmaf: + type: file + description: unfiltered maf file with read counts + pattern: "*.maf" + - cnv: + type: file + description: converted cnv file for phylowgs + pattern: "*.txt" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - phylowgsinput: - type: file - description: cnv_data.txt and ssm_data.txt - pattern: "*.txt" - + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - cnv_data.txt: + type: file + description: cnv_data.txt and ssm_data.txt + pattern: "*.txt" + - ssm_data.txt: + type: file + description: cnv_data.txt and ssm_data.txt + pattern: "*.txt" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@pintoa1-mskcc" maintainers: diff --git a/modules/msk/phylowgs/multievolve/environment.yml b/modules/msk/phylowgs/multievolve/environment.yml index d6a0ba5..92c6a48 100644 --- a/modules/msk/phylowgs/multievolve/environment.yml +++ b/modules/msk/phylowgs/multievolve/environment.yml @@ -1,9 +1,7 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "phylowgs_multievolve" channels: - conda-forge - bioconda - - defaults dependencies: - - "PHYLOWGS" + - "PHYLOWGS=NA" diff --git a/modules/msk/phylowgs/multievolve/main.nf b/modules/msk/phylowgs/multievolve/main.nf index d7f6e9c..117c78d 100644 --- a/modules/msk/phylowgs/multievolve/main.nf +++ b/modules/msk/phylowgs/multievolve/main.nf @@ -19,13 +19,11 @@ process PHYLOWGS_MULTIEVOLVE { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def threads = task.cpus * 2 """ python2 \\ /usr/bin/multievolve.py \\ ${args} \\ - --num-chains ${threads} \\ --ssms ${ssm_data} \\ --cnvs ${cnv_data} diff --git a/modules/msk/phylowgs/multievolve/meta.yml b/modules/msk/phylowgs/multievolve/meta.yml index 0ccb999..68e290b 100644 --- a/modules/msk/phylowgs/multievolve/meta.yml +++ b/modules/msk/phylowgs/multievolve/meta.yml @@ -1,4 +1,3 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "phylowgs_multievolve" description: Create trees from input from phylowgs_createinput @@ -12,36 +11,37 @@ tools: homepage: "https://genomebiology.biomedcentral.com/articles/10.1186/s13059-015-0602-8" tool_dev_url: "https://github.com/mskcc/phylowgs" + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - cnv_data: - type: file - description: copy number input data from phylowgs_createinput - pattern: "*.{txt}" - - ssm_data: - type: file - description: mutation input data from phylowgs_createinput - pattern: "*.{txt}" - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - cnv_data: + type: file + description: copy number input data from phylowgs_createinput + pattern: "*.{txt}" + - ssm_data: + type: file + description: mutation input data from phylowgs_createinput + pattern: "*.{txt}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - trees: - type: file - description: Zip file containing the completed trees - pattern: "trees.zip" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - chains/trees.zip: + type: file + description: Zip file containing the completed trees + pattern: "trees.zip" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@nikhil" maintainers: diff --git a/modules/msk/phylowgs/multievolve/tests/nextflow.config b/modules/msk/phylowgs/multievolve/tests/nextflow.config index 43986ff..2af6889 100644 --- a/modules/msk/phylowgs/multievolve/tests/nextflow.config +++ b/modules/msk/phylowgs/multievolve/tests/nextflow.config @@ -5,6 +5,6 @@ params { process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } withName: 'PHYLOWGS_MULTIEVOLVE' { - ext.args = '--burnin-samples 2 --mcmc-samples 2' + ext.args = '--num-chains 2 --burnin-samples 2 --mcmc-samples 2' } } diff --git a/modules/msk/phylowgs/parsecnvs/environment.yml b/modules/msk/phylowgs/parsecnvs/environment.yml index f54f5d8..4c59b93 100644 --- a/modules/msk/phylowgs/parsecnvs/environment.yml +++ b/modules/msk/phylowgs/parsecnvs/environment.yml @@ -1,9 +1,7 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "phylowgs_parsecnvs" channels: - conda-forge - bioconda - - defaults dependencies: - - "PHYLOWGS" + - "YOUR-TOOL=HERE" diff --git a/modules/msk/phylowgs/parsecnvs/meta.yml b/modules/msk/phylowgs/parsecnvs/meta.yml index 43e1b64..80b7beb 100644 --- a/modules/msk/phylowgs/parsecnvs/meta.yml +++ b/modules/msk/phylowgs/parsecnvs/meta.yml @@ -1,4 +1,3 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "phylowgs_parsecnvs" description: parse cnvs from FACETS for input to phylowgs @@ -12,33 +11,33 @@ tools: homepage: "https://genomebiology.biomedcentral.com/articles/10.1186/s13059-015-0602-8" tool_dev_url: "https://github.com/mskcc/phylowgs" + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - - facetsgenelevel: - type: file - description: single sample facets gene level output - pattern: "*.{txt}" - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - facetsgenelevel: + type: file + description: single sample facets gene level output + pattern: "*.{txt}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - cnv: - type: file - description: converted cnv file for phylowgs upstream processing - pattern: "*.txt" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - cnvs.txt: + type: file + description: converted cnv file for phylowgs upstream processing + pattern: "*.txt" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@pintoa1-mskcc" maintainers: diff --git a/modules/msk/phylowgs/writeresults/environment.yml b/modules/msk/phylowgs/writeresults/environment.yml index 8aed198..4c59b93 100644 --- a/modules/msk/phylowgs/writeresults/environment.yml +++ b/modules/msk/phylowgs/writeresults/environment.yml @@ -1,9 +1,7 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "phylowgs_writeresults" channels: - conda-forge - bioconda - - defaults dependencies: - - "PHYLOWGS" + - "YOUR-TOOL=HERE" diff --git a/modules/msk/phylowgs/writeresults/meta.yml b/modules/msk/phylowgs/writeresults/meta.yml index 8829447..e82f004 100644 --- a/modules/msk/phylowgs/writeresults/meta.yml +++ b/modules/msk/phylowgs/writeresults/meta.yml @@ -1,4 +1,3 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "phylowgs_writeresults" description: Write results from trees from phylowgs_multievolve @@ -12,40 +11,53 @@ tools: homepage: "https://genomebiology.biomedcentral.com/articles/10.1186/s13059-015-0602-8" tool_dev_url: "https://github.com/mskcc/phylowgs" + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - trees: - type: file - description: zip folder containing tree data from multievolve - pattern: "*.zip" - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - trees: + type: file + description: zip folder containing tree data from multievolve + pattern: "*.zip" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - summ: - type: file - description: Output file for JSON-formatted tree summaries - pattern: "*.summ.json.gz" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.summ.json.gz": + type: file + description: Output file for JSON-formatted tree summaries + pattern: "*.summ.json.gz" - muts: - type: file - description: Output file for JSON-formatted list of mutations - pattern: "*.muts.json.gz" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.muts.json.gz": + type: file + description: Output file for JSON-formatted list of mutations + pattern: "*.muts.json.gz" - mutass: - type: file - description: Output file for JSON-formatted list of SSMs and CNVs - pattern: "*.mutass.zip" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.mutass.zip": + type: file + description: Output file for JSON-formatted list of SSMs and CNVs + pattern: "*.mutass.zip" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@nikhil" maintainers: diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index 2121492..6f5b867 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -1,7 +1,5 @@ -name: multiqc channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::multiqc=1.23 + - bioconda::multiqc=1.25.1 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 459dfea..cc0643e 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,8 +3,8 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.23--pyhdfd78af_0' : - 'biocontainers/multiqc:1.23--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.25.1--pyhdfd78af_0' : + 'biocontainers/multiqc:1.25.1--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -25,9 +25,10 @@ process MULTIQC { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' - def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' + def logo = multiqc_logo ? "--cl-config 'custom_logo: \"${multiqc_logo}\"'" : '' def replace = replace_names ? "--replace-names ${replace_names}" : '' def samples = sample_names ? "--sample-names ${sample_names}" : '' """ @@ -35,6 +36,7 @@ process MULTIQC { --force \\ $args \\ $config \\ + $prefix \\ $extra_config \\ $logo \\ $replace \\ @@ -50,7 +52,7 @@ process MULTIQC { stub: """ mkdir multiqc_data - touch multiqc_plots + mkdir multiqc_plots touch multiqc_report.html cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index 382c08c..b16c187 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,6 @@ name: multiqc -description: Aggregate results from bioinformatics analyses across many samples into a single report +description: Aggregate results from bioinformatics analyses across many samples into + a single report keywords: - QC - bioinformatics tools @@ -12,53 +13,59 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] + identifier: biotools:multiqc input: - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC - - multiqc_config: - type: file - description: Optional config yml for MultiQC - pattern: "*.{yml,yaml}" - - extra_multiqc_config: - type: file - description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. - pattern: "*.{yml,yaml}" - - multiqc_logo: - type: file - description: Optional logo file for MultiQC - pattern: "*.{png}" - - replace_names: - type: file - description: | - Optional two-column sample renaming file. First column a set of - patterns, second column a set of corresponding replacements. Passed via - MultiQC's `--replace-names` option. - pattern: "*.{tsv}" - - sample_names: - type: file - description: | - Optional TSV file with headers, passed to the MultiQC --sample_names - argument. - pattern: "*.{tsv}" + - - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections + in multiqc_config. + pattern: "*.{yml,yaml}" + - - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + - - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + - - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" output: - report: - type: file - description: MultiQC report file - pattern: "multiqc_report.html" + - "*multiqc_report.html": + type: file + description: MultiQC report file + pattern: "multiqc_report.html" - data: - type: directory - description: MultiQC data dir - pattern: "multiqc_data" + - "*_data": + type: directory + description: MultiQC data dir + pattern: "multiqc_data" - plots: - type: file - description: Plots created by MultiQC - pattern: "*_data" + - "*_plots": + type: file + description: Plots created by MultiQC + pattern: "*_data" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@abhi18av" - "@bunop" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test index 6aa27f4..33316a7 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -8,6 +8,8 @@ nextflow_process { tag "modules_nfcore" tag "multiqc" + config "./nextflow.config" + test("sarscov2 single-end [fastqc]") { when { diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index 45e95e5..2fcbb5f 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -2,14 +2,14 @@ "multiqc_versions_single": { "content": [ [ - "versions.yml:md5,87904cd321df21fac35d18f0fc01bb19" + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-07-10T12:41:34.562023" + "timestamp": "2024-10-02T17:51:46.317523" }, "multiqc_stub": { "content": [ @@ -17,25 +17,25 @@ "multiqc_report.html", "multiqc_data", "multiqc_plots", - "versions.yml:md5,87904cd321df21fac35d18f0fc01bb19" + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-07-10T11:27:11.933869532" + "timestamp": "2024-10-02T17:52:20.680978" }, "multiqc_versions_config": { "content": [ [ - "versions.yml:md5,87904cd321df21fac35d18f0fc01bb19" + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-07-10T11:26:56.709849369" + "timestamp": "2024-10-02T17:52:09.185842" } } \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/nextflow.config b/modules/nf-core/multiqc/tests/nextflow.config new file mode 100644 index 0000000..c537a6a --- /dev/null +++ b/modules/nf-core/multiqc/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'MULTIQC' { + ext.prefix = null + } +} diff --git a/nextflow.config b/nextflow.config index c7b308c..fab34c1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -8,15 +8,16 @@ // Global default params, used in configs params { - // TODO nf-core: Specify your pipeline's command line flags // Input options input = null + // References genome = null igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false - fasta = null// MultiQC options + + // MultiQC options multiqc_config = null multiqc_title = null multiqc_logo = null @@ -24,190 +25,185 @@ params { multiqc_methods_description = null // Boilerplate options - outdir = null - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - version = false + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + help_full = false + show_hidden = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' // Config options config_profile_name = null config_profile_description = null + custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" config_profile_contact = null config_profile_url = null - // Max resource options - // Defaults only, expecting to be overwritten - max_memory = '128.GB' - max_cpus = 16 - max_time = '240.h' - // Schema validation default options - validationFailUnrecognisedParams = false - validationLenientMode = false - validationSchemaIgnoreParams = 'genomes,igenomes_base' - validationShowHiddenParams = false - validate_params = true - + validate_params = true } // Load base.config by default for all pipelines includeConfig 'conf/base.config' -// Load nf-core custom profiles from different Institutions -try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") -} - -// Load mskcc/neoantigenpipeline custom profiles from different institutions. -// Warning: Uncomment only if a pipeline-specific institutional config already exists on nf-core/configs! -// try { -// includeConfig "${params.custom_config_base}/pipeline/neoantigenpipeline.config" -// } catch (Exception e) { -// System.err.println("WARNING: Could not load nf-core/config/neoantigenpipeline profiles: ${params.custom_config_base}/pipeline/neoantigenpipeline.config") -// } profiles { debug { - dumpHashes = true - process.beforeScript = 'echo $HOSTNAME' - cleanup = false + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false nextflow.enable.configProcessNamesValidation = true } conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - channels = ['conda-forge', 'bioconda', 'defaults'] - apptainer.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda'] + apptainer.enabled = false } mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { - docker.enabled = true - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - docker.runOptions = '-u $(id -u):$(id -g)' + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true - conda.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - apptainer.enabled = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false } apptainer { - apptainer.enabled = true - apptainer.autoMounts = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' } gitpod { - executor.name = 'local' - executor.cpus = 4 - executor.memory = 8.GB + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } prod { includeConfig 'conf/prod.config' } } -// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile -// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled -// Set to your registry if you have a mirror of containers -apptainer.registry = 'quay.io' -docker.registry = 'quay.io' -podman.registry = 'quay.io' -singularity.registry = 'quay.io' +// Load nf-core custom profiles from different Institutions +includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : '/dev/null' -// Nextflow plugins -plugins { - id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet -} +// Load mskcc/neoantigenpipeline custom profiles from different institutions. +// TODO nf-core: Optionally, you can add a pipeline-specific nf-core config at https://github.com/nf-core/configs +// includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/neoantigenpipeline.config" : "/dev/null" + +// Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' +charliecloud.registry = 'quay.io' // Load igenomes.config if required -if (!params.igenomes_ignore) { - includeConfig 'conf/igenomes.config' -} else { - params.genomes = [:] -} +includeConfig !params.igenomes_ignore ? 'conf/igenomes.config' : 'conf/igenomes_ignored.config' + // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. env { PYTHONNOUSERSITE = 1 - R_PROFILE_USER = "/.Rprofile" - R_ENVIRON_USER = "/.Renviron" - JULIA_DEPOT_PATH = "/usr/local/share/julia" + R_PROFILE_USER = '/.Rprofile' + R_ENVIRON_USER = '/.Renviron' + JULIA_DEPOT_PATH = '/usr/local/share/julia' + TMP = '/tmp' + TMPDIR = '/tmp' } -// Capture exit codes from upstream processes when piping -process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Set bash options +process.shell = '''\ +bash + +set -e # Exit if a tool returns a non-zero status/exit code +set -u # Treat unset variables and parameters as an error +set -o pipefail # Returns the status of the last command to exit with a non-zero status or zero if all successfully execute +set -C # No clobber - prevent output redirection from overwriting files. +''' // Disable process selector warnings by default. Use debug profile to enable warnings. nextflow.enable.configProcessNamesValidation = false -def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') +def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') timeline { enabled = true file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" @@ -231,43 +227,25 @@ manifest { homePage = 'https://github.com/mskcc/neoantigenpipeline' description = """Pipeline for computing neoantigen qualities from DNA and RNA-Seq data""" mainScript = 'main.nf' - nextflowVersion = '!>=23.04.0' - version = '1.0.0' + nextflowVersion = '!>=24.04.0' + version = '1.1.0' doi = '' } -// Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' +// Nextflow plugins +plugins { + id 'nf-schema@2.1.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} -// Function to ensure that resource requirements don't go beyond -// a maximum limit -def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } +validation { + defaultIgnoreParams = ['genomes'] + help { + enabled = true + command = "nextflow run $manifest.name -profile --input samplesheet.csv --outdir " + fullParameter = 'help_full' + showHiddenParameter = 'show_hidden' } } + +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules.config' diff --git a/nextflow_schema.json b/nextflow_schema.json index 2bfd850..3121136 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,10 +1,10 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/mskcc/neoantigenpipeline/master/nextflow_schema.json", "title": "mskcc/neoantigenpipeline pipeline parameters", "description": "Pipeline for computing neoantigen qualities from DNA and RNA-Seq data", "type": "object", - "definitions": { + "$defs": { "input_output_options": { "title": "Input/output options", "type": "object", @@ -71,6 +71,14 @@ "fa_icon": "fas fa-ban", "hidden": true, "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + }, + "igenomes_base": { + "type": "string", + "format": "directory-path", + "description": "The base path to the igenomes reference files", + "fa_icon": "fas fa-ban", + "hidden": true, + "default": "s3://ngi-igenomes/igenomes/" } } }, @@ -122,41 +130,6 @@ } } }, - "max_job_request_options": { - "title": "Max job request options", - "type": "object", - "fa_icon": "fab fa-acquisitions-incorporated", - "description": "Set the top limit for requested resources for any single job.", - "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", - "properties": { - "max_cpus": { - "type": "integer", - "description": "Maximum number of CPUs that can be requested for any single job.", - "default": 16, - "fa_icon": "fas fa-microchip", - "hidden": true, - "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" - }, - "max_memory": { - "type": "string", - "description": "Maximum amount of memory that can be requested for any single job.", - "default": "128.GB", - "fa_icon": "fas fa-memory", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "hidden": true, - "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" - }, - "max_time": { - "type": "string", - "description": "Maximum amount of time that can be requested for any single job.", - "default": "240.h", - "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", - "hidden": true, - "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" - } - } - }, "generic_options": { "title": "Generic options", "type": "object", @@ -164,12 +137,6 @@ "description": "Less common options for the pipeline, typically set in a config file.", "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", "properties": { - "help": { - "type": "boolean", - "description": "Display help text.", - "fa_icon": "fas fa-question-circle", - "hidden": true - }, "version": { "type": "boolean", "description": "Display version and exit.", @@ -182,7 +149,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { @@ -245,45 +219,28 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "validationShowHiddenParams": { - "type": "boolean", - "fa_icon": "far fa-eye-slash", - "description": "Show all params when using `--help`", - "hidden": true, - "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." - }, - "validationFailUnrecognisedParams": { - "type": "boolean", - "fa_icon": "far fa-check-circle", - "description": "Validation of parameters fails when an unrecognised parameter is found.", - "hidden": true, - "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." - }, - "validationLenientMode": { - "type": "boolean", + "pipelines_testdata_base_path": { + "type": "string", "fa_icon": "far fa-check-circle", - "description": "Validation of parameters in lenient more.", - "hidden": true, - "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + "description": "Base URL or local path to location of pipeline test dataset files", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/", + "hidden": true } } } }, "allOf": [ { - "$ref": "#/definitions/input_output_options" - }, - { - "$ref": "#/definitions/reference_genome_options" + "$ref": "#/$defs/input_output_options" }, { - "$ref": "#/definitions/institutional_config_options" + "$ref": "#/$defs/reference_genome_options" }, { - "$ref": "#/definitions/max_job_request_options" + "$ref": "#/$defs/institutional_config_options" }, { - "$ref": "#/definitions/generic_options" + "$ref": "#/$defs/generic_options" } ] } diff --git a/subworkflows/local/utils_nfcore_neoantigenpipeline_pipeline/main.nf b/subworkflows/local/utils_nfcore_neoantigenpipeline_pipeline/main.nf index 86d4c91..9c9ac7b 100644 --- a/subworkflows/local/utils_nfcore_neoantigenpipeline_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_neoantigenpipeline_pipeline/main.nf @@ -8,29 +8,25 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' -include { paramsSummaryMap } from 'plugin/nf-validation' -include { fromSamplesheet } from 'plugin/nf-validation' -include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { samplesheetToList } from 'plugin/nf-schema' include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' -include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' -include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' include { imNotification } from '../../nf-core/utils_nfcore_pipeline' include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' -include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SUBWORKFLOW TO INITIALISE PIPELINE -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ workflow PIPELINE_INITIALISATION { take: version // boolean: Display version and exit - help // boolean: Display help text validate_params // boolean: Boolean whether to validate parameters against the schema at runtime monochrome_logs // boolean: Do not use coloured log outputs nextflow_cli_args // array: List of positional nextflow CLI args @@ -54,16 +50,10 @@ workflow PIPELINE_INITIALISATION { // // Validate parameters and generate parameter summary to stdout // - pre_help_text = nfCoreLogo(monochrome_logs) - post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) - def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " - UTILS_NFVALIDATION_PLUGIN ( - help, - workflow_command, - pre_help_text, - post_help_text, + UTILS_NFSCHEMA_PLUGIN ( + workflow, validate_params, - "nextflow_schema.json" + null ) // @@ -72,6 +62,7 @@ workflow PIPELINE_INITIALISATION { UTILS_NFCORE_PIPELINE ( nextflow_cli_args ) + // // Custom validation for pipeline parameters // @@ -80,8 +71,9 @@ workflow PIPELINE_INITIALISATION { // // Create channel from input file provided through params.input // + Channel - .fromSamplesheet("input") + .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) .map { meta, maf, facets_gene, hla_file -> [meta, maf, facets_gene, hla_file] @@ -99,9 +91,9 @@ workflow PIPELINE_INITIALISATION { } /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SUBWORKFLOW FOR PIPELINE COMPLETION -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ workflow PIPELINE_COMPLETION { @@ -116,7 +108,6 @@ workflow PIPELINE_COMPLETION { multiqc_report // string: Path to MultiQC report main: - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") // @@ -124,21 +115,32 @@ workflow PIPELINE_COMPLETION { // workflow.onComplete { if (email || email_on_fail) { - completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) + completionEmail( + summary_params, + email, + email_on_fail, + plaintext_email, + outdir, + monochrome_logs, + multiqc_report.toList() + ) } completionSummary(monochrome_logs) - if (hook_url) { imNotification(summary_params, hook_url) } } + + workflow.onError { + log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + } } /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FUNCTIONS -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ // // Check and validate pipeline parameters @@ -154,7 +156,7 @@ def validateInputSamplesheet(input) { def (metas, fastqs) = input[1..2] // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end - def endedness_ok = metas.collect{ it.single_end }.unique().size == 1 + def endedness_ok = metas.collect{ meta -> meta.single_end }.unique().size == 1 if (!endedness_ok) { error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") } @@ -186,7 +188,6 @@ def genomeExistsError() { error(error_string) } } - // // Generate methods description for MultiQC // @@ -223,8 +224,18 @@ def methodsDescriptionText(mqc_methods_yaml) { meta["manifest_map"] = workflow.manifest.toMap() // Pipeline DOI - meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + if (meta.manifest_map.doi) { + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + def temp_doi_ref = "" + def manifest_doi = meta.manifest_map.doi.tokenize(",") + manifest_doi.each { doi_ref -> + temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + } + meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) + } else meta["doi_text"] = "" + meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " // Tool references meta["tool_citations"] = "" @@ -242,3 +253,4 @@ def methodsDescriptionText(mqc_methods_yaml) { return description_html.toString() } + diff --git a/subworkflows/msk/netmhcstabandpan/main.nf b/subworkflows/msk/netmhcstabandpan/main.nf index ea8247c..980995e 100644 --- a/subworkflows/msk/netmhcstabandpan/main.nf +++ b/subworkflows/msk/netmhcstabandpan/main.nf @@ -1,6 +1,7 @@ include { NEOANTIGENUTILS_GENERATEHLASTRING } from '../../../modules/msk/neoantigenutils/generatehlastring/main' include { NEOANTIGENUTILS_GENERATEMUTFASTA } from '../../../modules/msk/neoantigenutils/generatemutfasta/main' -include { NETMHCPAN } from '../../../modules/msk/netmhcpan/main' +include { NETMHCPAN4 } from '../../../modules/msk/netmhcpan4/main' +include { NETMHC3 } from '../../../modules/msk/netmhc3/main' include { NETMHCSTABPAN } from '../../../modules/msk/netmhcstabpan/main' include { NEOANTIGENUTILS_FORMATNETMHCPAN } from '../../../modules/msk/neoantigenutils/formatnetmhcpan/main' @@ -10,6 +11,7 @@ workflow NETMHCSTABANDPAN { ch_maf_and_hla // channel: [ val(meta), maf, hla ] ch_cds_and_cdna // channel: [ cfs, cdna] + ch_neosv_out main: @@ -38,57 +40,83 @@ workflow NETMHCSTABANDPAN { ch_netmhcinput = createNETMHCInput(NEOANTIGENUTILS_GENERATEMUTFASTA.out.wt_fasta, NEOANTIGENUTILS_GENERATEMUTFASTA.out.mut_fasta, - NEOANTIGENUTILS_GENERATEHLASTRING.out.hlastring + NEOANTIGENUTILS_GENERATEHLASTRING.out.hlastring, + ch_neosv_out ) - - - NETMHCPAN( ch_netmhcinput ) - - ch_versions = ch_versions.mix(NETMHCPAN.out.versions) - + NETMHCSTABPAN( ch_netmhcinput ) ch_versions = ch_versions.mix(NETMHCSTABPAN.out.versions) - merged_pan_and_stab = NETMHCPAN.out.netmhcpanoutput.mix(NETMHCSTABPAN.out.netmhcstabpanoutput) + merged_pan_and_stab = Channel.empty() - NEOANTIGENUTILS_FORMATNETMHCPAN( merged_pan_and_stab ) + if ( params.netmhc3 ) { + + NETMHC3( ch_netmhcinput ) + ch_versions = ch_versions.mix(NETMHC3.out.versions) + merged_pan_and_stab = NETMHC3.out.netmhcoutput.mix(NETMHCSTABPAN.out.netmhcstabpanoutput) + } + else{ - ch_versions = ch_versions.mix( NEOANTIGENUTILS_FORMATNETMHCPAN.out.versions ) + NETMHCPAN4( ch_netmhcinput ) + ch_versions = ch_versions.mix(NETMHCPAN4.out.versions) + merged_pan_and_stab = NETMHCPAN4.out.netmhcpanoutput.mix(NETMHCSTABPAN.out.netmhcstabpanoutput) + } + NEOANTIGENUTILS_FORMATNETMHCPAN( merged_pan_and_stab ) + ch_versions = ch_versions.mix( NEOANTIGENUTILS_FORMATNETMHCPAN.out.versions ) emit: tsv = NEOANTIGENUTILS_FORMATNETMHCPAN.out.netMHCpanreformatted // channel: [ val(meta), [ tsv ] ] - xls = NETMHCPAN.out.xls // channel: [ val(meta), [ xls ] ] + //xls = NETMHCPAN.out.xls // channel: [ val(meta), [ xls ] ] mut_fasta = NEOANTIGENUTILS_GENERATEMUTFASTA.out.mut_fasta // channel: [ val(meta), [ *.MUT_sequences.fa ] ] wt_fasta = NEOANTIGENUTILS_GENERATEMUTFASTA.out.wt_fasta // channel: [ val(meta), [ *.WT_sequences.fa ] ] versions = ch_versions // channel: [ versions.yml ] } -def createNETMHCInput(wt_fasta, mut_fasta, hla) { +def createNETMHCInput(wt_fasta, mut_fasta, hla, sv_fastas) { mut_fasta_channel = mut_fasta .map{ - new Tuple(it[0].id,it) + new Tuple(it[0],it) } + wt_fasta_channel = wt_fasta .map{ - new Tuple(it[0].id,it) + new Tuple(it[0],it) } + + mut_SVfasta_channel = sv_fastas + .map{ + new Tuple(it[0],it[1]) + } + + wt_SVfasta_channel = sv_fastas + .map{ + new Tuple(it[0],it[2]) + } + hla_channel = hla .map{ - new Tuple(it[0].id,it) + new Tuple(it[0],it[1]) } - merged_mut = mut_fasta_channel - .join(hla_channel) + + merged_mut_fasta = mut_fasta_channel + .join(mut_SVfasta_channel, by:0) + + merged_mut = merged_mut_fasta.join(hla_channel) .map{ - new Tuple(it[1][0], it[1][1],it[2][1],"MUT") + new Tuple(it[1][0], it[1][1], it[2], it[3],"MUT") } - merged_wt = wt_fasta_channel + + merged_wt_fasta = wt_fasta_channel + .join(wt_SVfasta_channel,by: 0) + + merged_wt = merged_wt_fasta .join(hla_channel) .map{ - new Tuple(it[1][0], it[1][1],it[2][1],"WT") + new Tuple(it[1][0], it[1][1], it[2], it[3],"WT") } merged = merged_mut.mix(merged_wt) return merged diff --git a/subworkflows/msk/netmhcstabandpan/meta.yml b/subworkflows/msk/netmhcstabandpan/meta.yml index 5de706c..0fb68cc 100644 --- a/subworkflows/msk/netmhcstabandpan/meta.yml +++ b/subworkflows/msk/netmhcstabandpan/meta.yml @@ -9,7 +9,8 @@ keywords: components: - neoantigenutils/generatehlastring - neoantigenutils/generatemutfasta - - netmhcpan + - netmhc3 + - netmhcpan4 - netmhcstabpan - neoantigenutils/formatnetmhcpan input: diff --git a/subworkflows/msk/netmhcstabandpan/tests/main.nf.test b/subworkflows/msk/netmhcstabandpan/tests/main.nf.test index 74c750e..8758887 100644 --- a/subworkflows/msk/netmhcstabandpan/tests/main.nf.test +++ b/subworkflows/msk/netmhcstabandpan/tests/main.nf.test @@ -11,10 +11,89 @@ nextflow_workflow { tag "netmhcstabandpan" tag "neoantigenutils/generatehlastring" tag "neoantigenutils/generatemutfasta" - tag "netmhcpan" + tag "netmhc3" + tag "netmhcpan4" tag "netmhcstabpan" tag "neoantigenutils/formatnetmhcpan" + test("netmhcstabandpan - SV - tsv,xls,fa") { + + when { + workflow { + """ + input[0] = Channel.value([ + [ id:'test', single_end:false ], // meta map + file(params.test_data_mskcc['neoantigen']['temp_test_short_maf'], checkIfExists: true), + file(params.test_data_mskcc['neoantigen']['winners_hla_txt'], checkIfExists: true), + ]) + + input[1] = Channel.value([ + file(params.test_data_mskcc['neoantigen']['cds'], checkIfExists: true), + file(params.test_data_mskcc['neoantigen']['cdna'], checkIfExists: true) + ]) + + input[2] = Channel.value([ + [ id:'test', single_end:false ], // meta map + file(params.test_data_mskcc['neoantigen']['svfa'], checkIfExists: true), + file(params.test_data_mskcc['neoantigen']['wtsvfa'], checkIfExists: true) + ]) + + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(file(workflow.out.tsv[0][1]).name, + workflow.out.mut_fasta[0][1], + workflow.out.wt_fasta[0][1] + ).match() + } + ) + } + } + + test("netmhcstabandnetmhc3 - SV - tsv,xls,fa") { + + config "./nextflow.config" + + when { + workflow { + """ + input[0] = Channel.value([ + [ id:'test', single_end:false ], // meta map + file(params.test_data_mskcc['neoantigen']['temp_test_short_maf'], checkIfExists: true), + file(params.test_data_mskcc['neoantigen']['winners_hla_txt'], checkIfExists: true), + ]) + + input[1] = Channel.value([ + file(params.test_data_mskcc['neoantigen']['cds'], checkIfExists: true), + file(params.test_data_mskcc['neoantigen']['cdna'], checkIfExists: true) + ]) + + input[2] = Channel.value([ + [ id:'test', single_end:false ], // meta map + file(params.test_data_mskcc['neoantigen']['svfa'], checkIfExists: true), + file(params.test_data_mskcc['neoantigen']['wtsvfa'], checkIfExists: true) + ]) + + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(file(workflow.out.tsv[0][1]).name, + workflow.out.mut_fasta[0][1], + workflow.out.wt_fasta[0][1] + ).match() + } + ) + } + } + test("netmhcstabandpan - tsv,xls,fa") { when { @@ -25,11 +104,58 @@ nextflow_workflow { file(params.test_data_mskcc['neoantigen']['temp_test_short_maf'], checkIfExists: true), file(params.test_data_mskcc['neoantigen']['winners_hla_txt'], checkIfExists: true), ]) + + input[1] = Channel.value([ + file(params.test_data_mskcc['neoantigen']['cds'], checkIfExists: true), + file(params.test_data_mskcc['neoantigen']['cdna'], checkIfExists: true) + ]) + + input[2] = Channel.value([ + [ id:'test', single_end:false ], // meta map + [], + [] + ]) + + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(file(workflow.out.tsv[0][1]).name, + workflow.out.mut_fasta[0][1], + workflow.out.wt_fasta[0][1] + ).match() + } + ) + } + } + + test("netmhcstabandnetmhc3 - tsv,xls,fa") { + + config "./nextflow.config" + + when { + workflow { + """ + input[0] = Channel.value([ + [ id:'test', single_end:false ], // meta map + file(params.test_data_mskcc['neoantigen']['temp_test_short_maf'], checkIfExists: true), + file(params.test_data_mskcc['neoantigen']['winners_hla_txt'], checkIfExists: true), + ]) + input[1] = Channel.value([ file(params.test_data_mskcc['neoantigen']['cds'], checkIfExists: true), file(params.test_data_mskcc['neoantigen']['cdna'], checkIfExists: true) ]) + input[2] = Channel.value([ + [ id:'test', single_end:false ], // meta map + [], + [] + ]) + """ } } @@ -37,8 +163,7 @@ nextflow_workflow { then { assertAll( { assert workflow.success}, - { assert snapshot(workflow.out.tsv[0][1], - file(workflow.out.xls[0][1]).name, + { assert snapshot(file(workflow.out.tsv[0][1]).name, workflow.out.mut_fasta[0][1], workflow.out.wt_fasta[0][1] ).match() @@ -57,12 +182,19 @@ nextflow_workflow { input[0] = Channel.value([ [ id:'test', single_end:false ], // meta map file('temp_test_maf'), - file('winners_hla_txt'), + file('winners_hla_txt') ]) + input[1] = Channel.value([ file('cds'), file('cdna') ]) + + input[2] = Channel.value([ + [ id:'test', single_end:false ], // meta map + file('svMfa'), + file('svWfa') + ]) """ } } @@ -70,8 +202,7 @@ nextflow_workflow { then { assertAll( { assert workflow.success}, - { assert snapshot(workflow.out.tsv[0][1], - file(workflow.out.xls[0][1]).name, + { assert snapshot(file(workflow.out.tsv[0][1][1]).name, workflow.out.mut_fasta[0][1], workflow.out.wt_fasta[0][1] ).match() diff --git a/subworkflows/msk/netmhcstabandpan/tests/main.nf.test.snap b/subworkflows/msk/netmhcstabandpan/tests/main.nf.test.snap index dbde65e..b77b94a 100644 --- a/subworkflows/msk/netmhcstabandpan/tests/main.nf.test.snap +++ b/subworkflows/msk/netmhcstabandpan/tests/main.nf.test.snap @@ -1,20 +1,62 @@ { "netmhcstabandpan - tsv,xls,fa": { "content": [ - "test_netmHCpanoutput.WT.tsv:md5,a1d7db1b6f116e96457f2fa60660558e", - "test.WT.xls", + "test_netmhc.output.WT.tsv", "test.MUT_sequences.fa:md5,7fdb7d3f0fe5a6f439ed294b612c2d70", "test.WT_sequences.fa:md5,7595ed6cf0c98500b00c9ad027125b38" ], - "timestamp": "2024-07-30T13:48:55.729458" + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-12-02T17:16:20.890584183" }, "netmhcstabandpan - tsv,xls,fa - stub": { "content": [ - "test.WT.PAN.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "test.MUT.xls", + "h", "test.MUT_sequences.fa:md5,d41d8cd98f00b204e9800998ecf8427e", "test.WT_sequences.fa:md5,d41d8cd98f00b204e9800998ecf8427e" ], - "timestamp": "2024-07-30T13:49:11.413783" + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-12-02T17:17:10.765080369" + }, + "netmhcstabandnetmhc3 - SV - tsv,xls,fa": { + "content": [ + "test_netmhc.output.WT.tsv", + "test.MUT_sequences.fa:md5,118b48df96c7217675b9f9ac14309a25", + "test.WT_sequences.fa:md5,bb7dfff23ae47cf64ec4854ee48ec78d" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-12-02T17:15:41.011763441" + }, + "netmhcstabandpan - SV - tsv,xls,fa": { + "content": [ + "test_netmhc.output.WT.tsv", + "test.MUT_sequences.fa:md5,118b48df96c7217675b9f9ac14309a25", + "test.WT_sequences.fa:md5,bb7dfff23ae47cf64ec4854ee48ec78d" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-12-02T17:14:23.137692552" + }, + "netmhcstabandnetmhc3 - tsv,xls,fa": { + "content": [ + "test_netmhc.output.WT.tsv", + "test.MUT_sequences.fa:md5,7fdb7d3f0fe5a6f439ed294b612c2d70", + "test.WT_sequences.fa:md5,7595ed6cf0c98500b00c9ad027125b38" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-12-02T17:17:01.585453593" } } \ No newline at end of file diff --git a/subworkflows/msk/netmhcstabandpan/tests/nextflow.config b/subworkflows/msk/netmhcstabandpan/tests/nextflow.config new file mode 100644 index 0000000..1b341c3 --- /dev/null +++ b/subworkflows/msk/netmhcstabandpan/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + netmhc3 = true +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf index ac31f28..0fcbf7b 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -2,18 +2,13 @@ // Subworkflow with functionality that may be useful for any Nextflow pipeline // -import org.yaml.snakeyaml.Yaml -import groovy.json.JsonOutput -import nextflow.extension.FilesEx - /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SUBWORKFLOW DEFINITION -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ workflow UTILS_NEXTFLOW_PIPELINE { - take: print_version // boolean: print version dump_parameters // boolean: dump parameters @@ -26,7 +21,7 @@ workflow UTILS_NEXTFLOW_PIPELINE { // Print workflow version and exit on --version // if (print_version) { - log.info "${workflow.manifest.name} ${getWorkflowVersion()}" + log.info("${workflow.manifest.name} ${getWorkflowVersion()}") System.exit(0) } @@ -49,16 +44,16 @@ workflow UTILS_NEXTFLOW_PIPELINE { } /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FUNCTIONS -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ // // Generate version string // def getWorkflowVersion() { - String version_string = "" + def version_string = "" as String if (workflow.manifest.version) { def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' version_string += "${prefix_v}${workflow.manifest.version}" @@ -76,13 +71,13 @@ def getWorkflowVersion() { // Dump pipeline parameters to a JSON file // def dumpParametersToJSON(outdir) { - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def filename = "params_${timestamp}.json" - def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") - def jsonStr = JsonOutput.toJson(params) - temp_pf.text = JsonOutput.prettyPrint(jsonStr) + def timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = groovy.json.JsonOutput.toJson(params) + temp_pf.text = groovy.json.JsonOutput.prettyPrint(jsonStr) - FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + nextflow.extension.FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") temp_pf.delete() } @@ -90,37 +85,40 @@ def dumpParametersToJSON(outdir) { // When running with -profile conda, warn if channels have not been set-up appropriately // def checkCondaChannels() { - Yaml parser = new Yaml() + def parser = new org.yaml.snakeyaml.Yaml() def channels = [] try { def config = parser.load("conda config --show channels".execute().text) channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return + } + catch (NullPointerException e) { + log.warn("Could not verify conda channel configuration.") + return null + } + catch (IOException e) { + log.warn("Could not verify conda channel configuration.") + return null } // Check that all channels are present // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def required_channels_in_order = ['conda-forge', 'bioconda'] def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean // Check that they are in the right order - def channel_priority_violation = false - def n = required_channels_in_order.size() - for (int i = 0; i < n - 1; i++) { - channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) - } + def channel_priority_violation = required_channels_in_order != channels.findAll { ch -> ch in required_channels_in_order } if (channels_missing | channel_priority_violation) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/\n" + - " The observed channel order is \n" + - " ${channels}\n" + - " but the following channel order is required:\n" + - " ${required_channels_in_order}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + log.warn """\ + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + There is a problem with your Conda configuration! + You will need to set-up the conda-forge and bioconda channels correctly. + Please refer to https://bioconda.github.io/ + The observed channel order is + ${channels} + but the following channel order is required: + ${required_channels_in_order} + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + """.stripIndent(true) } } diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test index ca964ce..02dbf09 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -52,10 +52,12 @@ nextflow_workflow { } then { - assertAll( - { assert workflow.success }, - { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } - ) + expect { + with(workflow) { + assert success + assert "nextflow_workflow v9.9.9" in stdout + } + } } } diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config index d0a926b..a09572e 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -3,7 +3,7 @@ manifest { author = """nf-core""" homePage = 'https://127.0.0.1' description = """Dummy pipeline""" - nextflowVersion = '!>=23.04.0' + nextflowVersion = '!>=23.04.0' version = '9.9.9' doi = 'https://doi.org/10.5281/zenodo.5070524' } diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index 14558c3..5cb7baf 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -2,17 +2,13 @@ // Subworkflow with utility functions specific to the nf-core pipeline template // -import org.yaml.snakeyaml.Yaml -import nextflow.extension.FilesEx - /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SUBWORKFLOW DEFINITION -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ workflow UTILS_NFCORE_PIPELINE { - take: nextflow_cli_args @@ -25,23 +21,20 @@ workflow UTILS_NFCORE_PIPELINE { } /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FUNCTIONS -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ // // Warn if a -profile or Nextflow config has not been provided to run the pipeline // def checkConfigProvided() { - valid_config = true + def valid_config = true as Boolean if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " + log.warn( + "[${workflow.manifest.name}] You are attempting to run the pipeline without any custom configuration!\n\n" + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + "Please refer to the quick start section and usage docs for the pipeline.\n " + ) valid_config = false } return valid_config @@ -52,12 +45,14 @@ def checkConfigProvided() { // def checkProfileProvided(nextflow_cli_args) { if (workflow.profile.endsWith(',')) { - error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + - "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + error( + "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + ) } if (nextflow_cli_args[0]) { - log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + - "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + log.warn( + "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + ) } } @@ -66,25 +61,21 @@ def checkProfileProvided(nextflow_cli_args) { // def workflowCitation() { def temp_doi_ref = "" - String[] manifest_doi = workflow.manifest.doi.tokenize(",") - // Using a loop to handle multiple DOIs + def manifest_doi = workflow.manifest.doi.tokenize(",") + // Handling multiple DOIs // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers // Removing ` ` since the manifest.doi is a string and not a proper list - for (String doi_ref: manifest_doi) temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - "* The pipeline\n" + - temp_doi_ref + "\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" + manifest_doi.each { doi_ref -> + temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" + } + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + "* The pipeline\n" + temp_doi_ref + "\n" + "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + "* Software dependencies\n" + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" } // // Generate workflow version string // def getWorkflowVersion() { - String version_string = "" + def version_string = "" as String if (workflow.manifest.version) { def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' version_string += "${prefix_v}${workflow.manifest.version}" @@ -102,8 +93,8 @@ def getWorkflowVersion() { // Get software versions for pipeline // def processVersionsFromYAML(yaml_file) { - Yaml yaml = new Yaml() - versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } + def yaml = new org.yaml.snakeyaml.Yaml() + def versions = yaml.load(yaml_file).collectEntries { k, v -> [k.tokenize(':')[-1], v] } return yaml.dumpAsMap(versions).trim() } @@ -113,8 +104,8 @@ def processVersionsFromYAML(yaml_file) { def workflowVersionToYAML() { return """ Workflow: - $workflow.manifest.name: ${getWorkflowVersion()} - Nextflow: $workflow.nextflow.version + ${workflow.manifest.name}: ${getWorkflowVersion()} + Nextflow: ${workflow.nextflow.version} """.stripIndent().trim() } @@ -122,11 +113,7 @@ def workflowVersionToYAML() { // Get channel of software versions used in pipeline in YAML format // def softwareVersionsToYAML(ch_versions) { - return ch_versions - .unique() - .map { processVersionsFromYAML(it) } - .unique() - .mix(Channel.of(workflowVersionToYAML())) + return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(Channel.of(workflowVersionToYAML())) } // @@ -134,25 +121,31 @@ def softwareVersionsToYAML(ch_versions) { // def paramsSummaryMultiqc(summary_params) { def summary_section = '' - for (group in summary_params.keySet()) { - def group_params = summary_params.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

    $group

    \n" - summary_section += "
    \n" - for (param in group_params.keySet()) { - summary_section += "
    $param
    ${group_params.get(param) ?: 'N/A'}
    \n" + summary_params + .keySet() + .each { group -> + def group_params = summary_params.get(group) + // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    ${group}

    \n" + summary_section += "
    \n" + group_params + .keySet() + .sort() + .each { param -> + summary_section += "
    ${param}
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" } - summary_section += "
    \n" } - } - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" + def yaml_file_text = "id: '${workflow.manifest.name.replace('/', '-')}-summary'\n" as String + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" return yaml_file_text } @@ -161,7 +154,7 @@ def paramsSummaryMultiqc(summary_params) { // nf-core logo // def nfCoreLogo(monochrome_logs=true) { - Map colors = logColours(monochrome_logs) + def colors = logColours(monochrome_logs) as Map String.format( """\n ${dashedLine(monochrome_logs)} @@ -180,7 +173,7 @@ def nfCoreLogo(monochrome_logs=true) { // Return dashed line // def dashedLine(monochrome_logs=true) { - Map colors = logColours(monochrome_logs) + def colors = logColours(monochrome_logs) as Map return "-${colors.dim}----------------------------------------------------${colors.reset}-" } @@ -188,7 +181,7 @@ def dashedLine(monochrome_logs=true) { // ANSII colours used for terminal logging // def logColours(monochrome_logs=true) { - Map colorcodes = [:] + def colorcodes = [:] as Map // Reset / Meta colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" @@ -200,54 +193,54 @@ def logColours(monochrome_logs=true) { colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" return colorcodes } @@ -262,14 +255,15 @@ def attachMultiqcReport(multiqc_report) { mqc_report = multiqc_report.getVal() if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + log.warn("[${workflow.manifest.name}] Found multiple reports from process 'MULTIQC', will use only one") } mqc_report = mqc_report[0] } } - } catch (all) { + } + catch (Exception all) { if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + log.warn("[${workflow.manifest.name}] Could not attach MultiQC report to summary email") } } return mqc_report @@ -281,26 +275,35 @@ def attachMultiqcReport(multiqc_report) { def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + def subject = "[${workflow.manifest.name}] Successful: ${workflow.runName}" if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + subject = "[${workflow.manifest.name}] FAILED: ${workflow.runName}" } def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } + summary_params + .keySet() + .sort() + .each { group -> + summary << summary_params[group] + } def misc_fields = [:] misc_fields['Date Started'] = workflow.start misc_fields['Date Completed'] = workflow.complete misc_fields['Pipeline script file path'] = workflow.scriptFile misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build + if (workflow.repository) { + misc_fields['Pipeline repository Git URL'] = workflow.repository + } + if (workflow.commitId) { + misc_fields['Pipeline repository Git Commit'] = workflow.commitId + } + if (workflow.revision) { + misc_fields['Pipeline Git branch/tag'] = workflow.revision + } + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp def email_fields = [:] @@ -338,39 +341,41 @@ def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdi // Render the sendmail template def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def smail_fields = [email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()] def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) def sendmail_html = sendmail_template.toString() // Send the HTML e-mail - Map colors = logColours(monochrome_logs) + def colors = logColours(monochrome_logs) as Map if (email_address) { try { - if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + if (plaintext_email) { +new org.codehaus.groovy.GroovyException('Send plaintext e-mail, not HTML') } // Try to send HTML e-mail using sendmail def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") sendmail_tf.withWriter { w -> w << sendmail_html } - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { + ['sendmail', '-t'].execute() << sendmail_html + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Sent summary e-mail to ${email_address} (sendmail)-") + } + catch (Exception all) { // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + def mail_cmd = ['mail', '-s', subject, '--content-type=text/html', email_address] mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Sent summary e-mail to ${email_address} (mail)-") } } // Write summary e-mail HTML to a file def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") output_hf.withWriter { w -> w << email_html } - FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + nextflow.extension.FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html") output_hf.delete() // Write summary e-mail TXT to a file def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") output_tf.withWriter { w -> w << email_txt } - FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + nextflow.extension.FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt") output_tf.delete() } @@ -378,15 +383,17 @@ def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdi // Print pipeline summary on completion // def completionSummary(monochrome_logs=true) { - Map colors = logColours(monochrome_logs) + def colors = logColours(monochrome_logs) as Map if (workflow.success) { if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Pipeline completed successfully${colors.reset}-") + } + else { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-") } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } + else { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.red} Pipeline completed with errors${colors.reset}-") } } @@ -395,21 +402,30 @@ def completionSummary(monochrome_logs=true) { // def imNotification(summary_params, hook_url) { def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } + summary_params + .keySet() + .sort() + .each { group -> + summary << summary_params[group] + } def misc_fields = [:] - misc_fields['start'] = workflow.start - misc_fields['complete'] = workflow.complete - misc_fields['scriptfile'] = workflow.scriptFile - misc_fields['scriptid'] = workflow.scriptId - if (workflow.repository) misc_fields['repository'] = workflow.repository - if (workflow.commitId) misc_fields['commitid'] = workflow.commitId - if (workflow.revision) misc_fields['revision'] = workflow.revision - misc_fields['nxf_version'] = workflow.nextflow.version - misc_fields['nxf_build'] = workflow.nextflow.build - misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) { + misc_fields['repository'] = workflow.repository + } + if (workflow.commitId) { + misc_fields['commitid'] = workflow.commitId + } + if (workflow.revision) { + misc_fields['revision'] = workflow.revision + } + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp def msg_fields = [:] msg_fields['version'] = getWorkflowVersion() @@ -434,13 +450,13 @@ def imNotification(summary_params, hook_url) { def json_message = json_template.toString() // POST - def post = new URL(hook_url).openConnection(); + def post = new URL(hook_url).openConnection() post.setRequestMethod("POST") post.setDoOutput(true) post.setRequestProperty("Content-Type", "application/json") - post.getOutputStream().write(json_message.getBytes("UTF-8")); - def postRC = post.getResponseCode(); - if (! postRC.equals(200)) { - log.warn(post.getErrorStream().getText()); + post.getOutputStream().write(json_message.getBytes("UTF-8")) + def postRC = post.getResponseCode() + if (!postRC.equals(200)) { + log.warn(post.getErrorStream().getText()) } } diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test index 1dc317f..e43d208 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -41,58 +41,6 @@ nextflow_function { } } - test("Test Function workflowCitation") { - - function "workflowCitation" - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } - - test("Test Function nfCoreLogo") { - - function "nfCoreLogo" - - when { - function { - """ - input[0] = false - """ - } - } - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } - - test("Test Function dashedLine") { - - function "dashedLine" - - when { - function { - """ - input[0] = false - """ - } - } - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } - test("Test Function without logColours") { function "logColours" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap index 1037232..02c6701 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -17,26 +17,6 @@ }, "timestamp": "2024-02-28T12:02:59.729647" }, - "Test Function nfCoreLogo": { - "content": [ - "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:03:10.562934" - }, - "Test Function workflowCitation": { - "content": [ - "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:03:07.019761" - }, "Test Function without logColours": { "content": [ { @@ -95,16 +75,6 @@ }, "timestamp": "2024-02-28T12:03:17.969323" }, - "Test Function dashedLine": { - "content": [ - "-\u001b[2m----------------------------------------------------\u001b[0m-" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:03:14.366181" - }, "Test Function with logColours": { "content": [ { diff --git a/subworkflows/nf-core/utils_nfschema_plugin/main.nf b/subworkflows/nf-core/utils_nfschema_plugin/main.nf new file mode 100644 index 0000000..4994303 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/main.nf @@ -0,0 +1,46 @@ +// +// Subworkflow that uses the nf-schema plugin to validate parameters and render the parameter summary +// + +include { paramsSummaryLog } from 'plugin/nf-schema' +include { validateParameters } from 'plugin/nf-schema' + +workflow UTILS_NFSCHEMA_PLUGIN { + + take: + input_workflow // workflow: the workflow object used by nf-schema to get metadata from the workflow + validate_params // boolean: validate the parameters + parameters_schema // string: path to the parameters JSON schema. + // this has to be the same as the schema given to `validation.parametersSchema` + // when this input is empty it will automatically use the configured schema or + // "${projectDir}/nextflow_schema.json" as default. This input should not be empty + // for meta pipelines + + main: + + // + // Print parameter summary to stdout. This will display the parameters + // that differ from the default given in the JSON schema + // + if(parameters_schema) { + log.info paramsSummaryLog(input_workflow, parameters_schema:parameters_schema) + } else { + log.info paramsSummaryLog(input_workflow) + } + + // + // Validate the parameters using nextflow_schema.json or the schema + // given via the validation.parametersSchema configuration option + // + if(validate_params) { + if(parameters_schema) { + validateParameters(parameters_schema:parameters_schema) + } else { + validateParameters() + } + } + + emit: + dummy_emit = true +} + diff --git a/subworkflows/nf-core/utils_nfschema_plugin/meta.yml b/subworkflows/nf-core/utils_nfschema_plugin/meta.yml new file mode 100644 index 0000000..f7d9f02 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/meta.yml @@ -0,0 +1,35 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "utils_nfschema_plugin" +description: Run nf-schema to validate parameters and create a summary of changed parameters +keywords: + - validation + - JSON schema + - plugin + - parameters + - summary +components: [] +input: + - input_workflow: + type: object + description: | + The workflow object of the used pipeline. + This object contains meta data used to create the params summary log + - validate_params: + type: boolean + description: Validate the parameters and error if invalid. + - parameters_schema: + type: string + description: | + Path to the parameters JSON schema. + This has to be the same as the schema given to the `validation.parametersSchema` config + option. When this input is empty it will automatically use the configured schema or + "${projectDir}/nextflow_schema.json" as default. The schema should not be given in this way + for meta pipelines. +output: + - dummy_emit: + type: boolean + description: Dummy emit to make nf-core subworkflows lint happy +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test new file mode 100644 index 0000000..842dc43 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test @@ -0,0 +1,117 @@ +nextflow_workflow { + + name "Test Subworkflow UTILS_NFSCHEMA_PLUGIN" + script "../main.nf" + workflow "UTILS_NFSCHEMA_PLUGIN" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/utils_nfschema_plugin" + tag "plugin/nf-schema" + + config "./nextflow.config" + + test("Should run nothing") { + + when { + + params { + test_data = '' + } + + workflow { + """ + validate_params = false + input[0] = workflow + input[1] = validate_params + input[2] = "" + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should validate params") { + + when { + + params { + test_data = '' + outdir = 1 + } + + workflow { + """ + validate_params = true + input[0] = workflow + input[1] = validate_params + input[2] = "" + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ Validation of pipeline parameters failed!') } } + ) + } + } + + test("Should run nothing - custom schema") { + + when { + + params { + test_data = '' + } + + workflow { + """ + validate_params = false + input[0] = workflow + input[1] = validate_params + input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should validate params - custom schema") { + + when { + + params { + test_data = '' + outdir = 1 + } + + workflow { + """ + validate_params = true + input[0] = workflow + input[1] = validate_params + input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config new file mode 100644 index 0000000..0907ac5 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config @@ -0,0 +1,8 @@ +plugins { + id "nf-schema@2.1.0" +} + +validation { + parametersSchema = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + monochromeLogs = true +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json similarity index 95% rename from subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json rename to subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json index 7626c1c..331e0d2 100644 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json @@ -1,10 +1,10 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", "title": ". pipeline parameters", "description": "", "type": "object", - "definitions": { + "$defs": { "input_output_options": { "title": "Input/output options", "type": "object", @@ -87,10 +87,10 @@ }, "allOf": [ { - "$ref": "#/definitions/input_output_options" + "$ref": "#/$defs/input_output_options" }, { - "$ref": "#/definitions/generic_options" + "$ref": "#/$defs/generic_options" } ] } diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf deleted file mode 100644 index 2585b65..0000000 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf +++ /dev/null @@ -1,62 +0,0 @@ -// -// Subworkflow that uses the nf-validation plugin to render help text and parameter summary -// - -/* -======================================================================================== - IMPORT NF-VALIDATION PLUGIN -======================================================================================== -*/ - -include { paramsHelp } from 'plugin/nf-validation' -include { paramsSummaryLog } from 'plugin/nf-validation' -include { validateParameters } from 'plugin/nf-validation' - -/* -======================================================================================== - SUBWORKFLOW DEFINITION -======================================================================================== -*/ - -workflow UTILS_NFVALIDATION_PLUGIN { - - take: - print_help // boolean: print help - workflow_command // string: default commmand used to run pipeline - pre_help_text // string: string to be printed before help text and summary log - post_help_text // string: string to be printed after help text and summary log - validate_params // boolean: validate parameters - schema_filename // path: JSON schema file, null to use default value - - main: - - log.debug "Using schema file: ${schema_filename}" - - // Default values for strings - pre_help_text = pre_help_text ?: '' - post_help_text = post_help_text ?: '' - workflow_command = workflow_command ?: '' - - // - // Print help message if needed - // - if (print_help) { - log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text - System.exit(0) - } - - // - // Print parameter summary to stdout - // - log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text - - // - // Validate parameters relative to the parameter JSON schema - // - if (validate_params){ - validateParameters(parameters_schema: schema_filename) - } - - emit: - dummy_emit = true -} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml deleted file mode 100644 index 3d4a6b0..0000000 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml +++ /dev/null @@ -1,44 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json -name: "UTILS_NFVALIDATION_PLUGIN" -description: Use nf-validation to initiate and validate a pipeline -keywords: - - utility - - pipeline - - initialise - - validation -components: [] -input: - - print_help: - type: boolean - description: | - Print help message and exit - - workflow_command: - type: string - description: | - The command to run the workflow e.g. "nextflow run main.nf" - - pre_help_text: - type: string - description: | - Text to print before the help message - - post_help_text: - type: string - description: | - Text to print after the help message - - validate_params: - type: boolean - description: | - Validate the parameters and error if invalid. - - schema_filename: - type: string - description: | - The filename of the schema to validate against. -output: - - dummy_emit: - type: boolean - description: | - Dummy emit to make nf-core subworkflows lint happy -authors: - - "@adamrtalbot" -maintainers: - - "@adamrtalbot" - - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test deleted file mode 100644 index 5784a33..0000000 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test +++ /dev/null @@ -1,200 +0,0 @@ -nextflow_workflow { - - name "Test Workflow UTILS_NFVALIDATION_PLUGIN" - script "../main.nf" - workflow "UTILS_NFVALIDATION_PLUGIN" - tag "subworkflows" - tag "subworkflows_nfcore" - tag "plugin/nf-validation" - tag "'plugin/nf-validation'" - tag "utils_nfvalidation_plugin" - tag "subworkflows/utils_nfvalidation_plugin" - - test("Should run nothing") { - - when { - - params { - monochrome_logs = true - test_data = '' - } - - workflow { - """ - help = false - workflow_command = null - pre_help_text = null - post_help_text = null - validate_params = false - schema_filename = "$moduleTestDir/nextflow_schema.json" - - input[0] = help - input[1] = workflow_command - input[2] = pre_help_text - input[3] = post_help_text - input[4] = validate_params - input[5] = schema_filename - """ - } - } - - then { - assertAll( - { assert workflow.success } - ) - } - } - - test("Should run help") { - - - when { - - params { - monochrome_logs = true - test_data = '' - } - workflow { - """ - help = true - workflow_command = null - pre_help_text = null - post_help_text = null - validate_params = false - schema_filename = "$moduleTestDir/nextflow_schema.json" - - input[0] = help - input[1] = workflow_command - input[2] = pre_help_text - input[3] = post_help_text - input[4] = validate_params - input[5] = schema_filename - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert workflow.exitStatus == 0 }, - { assert workflow.stdout.any { it.contains('Input/output options') } }, - { assert workflow.stdout.any { it.contains('--outdir') } } - ) - } - } - - test("Should run help with command") { - - when { - - params { - monochrome_logs = true - test_data = '' - } - workflow { - """ - help = true - workflow_command = "nextflow run noorg/doesntexist" - pre_help_text = null - post_help_text = null - validate_params = false - schema_filename = "$moduleTestDir/nextflow_schema.json" - - input[0] = help - input[1] = workflow_command - input[2] = pre_help_text - input[3] = post_help_text - input[4] = validate_params - input[5] = schema_filename - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert workflow.exitStatus == 0 }, - { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, - { assert workflow.stdout.any { it.contains('Input/output options') } }, - { assert workflow.stdout.any { it.contains('--outdir') } } - ) - } - } - - test("Should run help with extra text") { - - - when { - - params { - monochrome_logs = true - test_data = '' - } - workflow { - """ - help = true - workflow_command = "nextflow run noorg/doesntexist" - pre_help_text = "pre-help-text" - post_help_text = "post-help-text" - validate_params = false - schema_filename = "$moduleTestDir/nextflow_schema.json" - - input[0] = help - input[1] = workflow_command - input[2] = pre_help_text - input[3] = post_help_text - input[4] = validate_params - input[5] = schema_filename - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert workflow.exitStatus == 0 }, - { assert workflow.stdout.any { it.contains('pre-help-text') } }, - { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, - { assert workflow.stdout.any { it.contains('Input/output options') } }, - { assert workflow.stdout.any { it.contains('--outdir') } }, - { assert workflow.stdout.any { it.contains('post-help-text') } } - ) - } - } - - test("Should validate params") { - - when { - - params { - monochrome_logs = true - test_data = '' - outdir = 1 - } - workflow { - """ - help = false - workflow_command = null - pre_help_text = null - post_help_text = null - validate_params = true - schema_filename = "$moduleTestDir/nextflow_schema.json" - - input[0] = help - input[1] = workflow_command - input[2] = pre_help_text - input[3] = post_help_text - input[4] = validate_params - input[5] = schema_filename - """ - } - } - - then { - assertAll( - { assert workflow.failed }, - { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } - ) - } - } -} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml deleted file mode 100644 index 60b1cff..0000000 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -subworkflows/utils_nfvalidation_plugin: - - subworkflows/nf-core/utils_nfvalidation_plugin/** diff --git a/workflows/neoantigenpipeline.nf b/workflows/neoantigenpipeline.nf index 7ee2533..5b94155 100644 --- a/workflows/neoantigenpipeline.nf +++ b/workflows/neoantigenpipeline.nf @@ -3,8 +3,7 @@ IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - -include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMap } from 'plugin/nf-schema' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_neoantigenpipeline_pipeline' @@ -17,6 +16,7 @@ include { NETMHCSTABANDPAN } from '../subworkflows/msk/netmhcstabandpan/main' include { NETMHCPAN } from '../modules/msk/netmhcpan/main' include { NEOANTIGENUTILS_NEOANTIGENINPUT } from '../modules/msk/neoantigenutils/neoantigeninput' include { NEOANTIGEN_EDITING } from '../subworkflows/msk/neoantigen_editing' +include { NEOANTIGENUTILS_CONVERTANNOTJSON } from '../modules/msk/neoantigenutils/convertannotjson' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -51,12 +51,19 @@ workflow NEOANTIGENPIPELINE { } .set { phylowgs_input_ch } + ch_samplesheet.map { + meta, maf, facets_hisens_cncf, hla_file -> + [meta, [], []] + + } + .set { ch_sv_empty } + // phylowgs workflow PHYLOWGS(phylowgs_input_ch) ch_versions = ch_versions.mix(PHYLOWGS.out.versions) - NETMHCSTABANDPAN(netMHCpan_input_ch,ch_cds_and_cdna) + NETMHCSTABANDPAN(netMHCpan_input_ch,ch_cds_and_cdna,ch_sv_empty) ch_versions = ch_versions.mix(NETMHCSTABANDPAN.out.versions) @@ -73,7 +80,7 @@ workflow NEOANTIGENPIPELINE { merged_netMHC_input = merged .map{ - new Tuple(it[0], it[1], it[2]) + new Tuple(it[0], it[1], [], it[2]) } merged_phylo_output = merged .map{ @@ -92,18 +99,28 @@ workflow NEOANTIGENPIPELINE { ch_versions = ch_versions.mix(NEOANTIGEN_EDITING.out.versions) + NEOANTIGENUTILS_CONVERTANNOTJSON(NEOANTIGEN_EDITING.out.annotated_output) + + ch_versions = ch_versions.mix(NEOANTIGENUTILS_CONVERTANNOTJSON.out.versions) + // // Collate and save software versions // softwareVersionsToYAML(ch_versions) - .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_pipeline_software_mqc_versions.yml', sort: true, newLine: true) - .set { ch_collated_versions } + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: '' + 'pipeline_software_' + 'mqc_' + 'versions.yml', + sort: true, + newLine: true + ).set { ch_collated_versions } + emit: versions = ch_versions // channel: [ path(versions.yml) ] neo_out = NEOANTIGEN_EDITING.out.annotated_output + tsv_out = NEOANTIGENUTILS_CONVERTANNOTJSON.out.neoantigenTSV } def merge_for_input_generation(netMHCpan_input_ch, summ_ch, muts_ch, mutass_ch, netmhcpan_mut_tsv_ch, netmhcpan_wt_tsv_ch ) {