diff --git a/.nf-core.yml b/.nf-core.yml index b6ad47c7..57742128 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -43,4 +43,4 @@ template: skip_features: - fastqc - is_nfcore - version: 1.9.3 + version: 1.10.0dev diff --git a/CHANGELOG.md b/CHANGELOG.md index 7a96d2ad..070f3e69 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## v1.10.0dev +## Changes + +1. Merged the following processes to improve efficiency of the pipeline: + - VCF index creation modules on output VCFs have been merged into the processes that created these VCFs + - The filter modules for `--filter` have been merged + - BED filtering and intersecting with Regions Of Interest have been merged + ## v1.9.3 Nifty Nieuwkerke - [January 23 2025] 1. Fix db postprocess in vcf2db module diff --git a/conf/modules.config b/conf/modules.config index 3460d37f..ab8c8964 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -49,15 +49,11 @@ process { ].join(" ") } - withName: "^.*CRAM_PREPARE_SAMTOOLS_BEDTOOLS:FILTER_BEDS\$" { + withName: "^.*CRAM_PREPARE_SAMTOOLS_BEDTOOLS:PROCESS_BEDS\$" { ext.prefix = { "${meta.id}.filter"} ext.args = "-vE \"LOW_COVERAGE|NO_COVERAGE${params.keep_alt_contigs ? "" : "|alt|random|decoy|Un"}\"" ext.args2 = "-d 150" - } - - withName: "^.*CRAM_PREPARE_SAMTOOLS_BEDTOOLS:BEDTOOLS_INTERSECT\$" { - ext.prefix = {"${meta.id}.intersect"} - ext.args = "-sorted" + ext.args3 = "-sorted" } /* @@ -87,7 +83,7 @@ process { withName: "^.*CRAM_CALL_GATK4:VCF_CONCAT_BCFTOOLS:BCFTOOLS_CONCAT\$" { ext.prefix = { "${meta.id}.${meta.caller}.g" } - ext.args = '--allow-overlaps --output-type z' + ext.args = '--allow-overlaps --output-type z --write-index=tbi' } withName: "^.*CRAM_CALL_GATK4:BCFTOOLS_STATS\$" { @@ -108,7 +104,7 @@ process { withName: "^.*BAM_CALL_ELPREP:VCF_CONCAT_BCFTOOLS:BCFTOOLS_CONCAT\$" { ext.prefix = { "${meta.id}.${meta.caller}.g" } - ext.args = '--allow-overlaps --output-type z' + ext.args = '--allow-overlaps --output-type z --write-index=tbi' } withName: "^.*BAM_CALL_ELPREP:BCFTOOLS_STATS\$" { @@ -168,7 +164,7 @@ process { withName: "^.*GVCF_JOINT_GENOTYPE_GATK4:VCF_CONCAT_BCFTOOLS:BCFTOOLS_CONCAT\$" { ext.prefix = { "${meta.id}.concat" } - ext.args = "--allow-overlaps --output-type z" + ext.args = "--allow-overlaps --output-type z --write-index=tbi" } /* @@ -194,45 +190,29 @@ process { } withName: "^.*BAM_CALL_VARDICTJAVA:VCF_CONCAT_BCFTOOLS:BCFTOOLS_CONCAT\$" { - ext.args = '--allow-overlaps --output-type z' + ext.args = '--allow-overlaps --output-type z --write-index=tbi' ext.prefix = {"${meta.id}.concat"} } - withName: "^.*BAM_CALL_VARDICTJAVA:TABIX_VCFANNO\$" { - ext.prefix = {"${meta.id}.vcfanno"} - } - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FILTER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - withName: "^.*VCF_FILTER_BCFTOOLS:FILTER_1\$" { - ext.prefix = { "${meta.id}.filtered1" } - ext.args = { - meta.caller == "vardict" ? - "-i 'QUAL >= 0${params.only_pass ? " && FILTER=\"PASS\"" : ""}' --output-type z": - meta.caller == "haplotypecaller" ? - "--output-type z --soft-filter 'GATKCutoffSNP' -e 'TYPE=\"snp\" && (MQRankSum < -12.5 || ReadPosRankSum < -8.0 || QD < 2.0 || FS > 60.0 || MQ < 30.0)' -m '+'": - meta.caller == "elprep" ? - "--output-type z --soft-filter 'GATKCutoffSNP' -e 'TYPE=\"snp\" && (MQRankSum < -12.5 || ReadPosRankSum < -8.0 || QD < 2.0 || FS > 60.0 || MQ < 30.0)' -m '+'": - "" - } - } - - withName: "^.*VCF_FILTER_BCFTOOLS:FILTER_2\$" { - ext.args = { - meta.caller == "vardict" ? - "--soft-filter 'LowFreqBias' --mode '+' -e 'FORMAT/AF[0:*] < 0.02 && FORMAT/VD[0] < 30 && INFO/SBF < 0.1 && INFO/NM >= 2.0' --output-type z" : - meta.caller == "haplotypecaller" ? - '--output-type z --soft-filter \'GATKCutoffIndel\' -e \'TYPE="indel" && (ReadPosRankSum < -20.0 || QD < 2.0 || FS > 200.0 || SOR > 10.0 )\' -m \'+\'' : - meta.caller == "elprep" ? - '--output-type z --soft-filter \'GATKCutoffIndel\' -e \'TYPE="indel" && (ReadPosRankSum < -20.0 || QD < 2.0 || FS > 200.0 || SOR > 10.0 )\' -m \'+\'' : - "" - - } - ext.prefix = {"${meta.id}.filtered"} + withName: "^.*VCF_FILTER_BCFTOOLS:BCFTOOLS_FILTER\$" { + ext.prefix = { "${meta.id}.filtered" } + ext.args = { [ + meta.caller == "vardict" ? "-i 'QUAL >= 0${params.only_pass ? " && FILTER=\"PASS\"" : ""}'" : "", + meta.caller == "haplotypecaller" ? "--soft-filter 'GATKCutoffSNP' -e 'TYPE=\"snp\" && (MQRankSum < -12.5 || ReadPosRankSum < -8.0 || QD < 2.0 || FS > 60.0 || MQ < 30.0)' -m '+'" : "", + meta.caller == "elprep" ? "--output-type z --soft-filter 'GATKCutoffSNP' -e 'TYPE=\"snp\" && (MQRankSum < -12.5 || ReadPosRankSum < -8.0 || QD < 2.0 || FS > 60.0 || MQ < 30.0)' -m '+'" : "" + ].findAll { arg -> arg != "" }.join(" ") } + ext.args2 = { [ + meta.caller == "vardict" ? "--soft-filter 'LowFreqBias' --mode '+' -e 'FORMAT/AF[0:*] < 0.02 && FORMAT/VD[0] < 30 && INFO/SBF < 0.1 && INFO/NM >= 2.0'" : "", + meta.caller == "haplotypecaller" ? '--soft-filter \'GATKCutoffIndel\' -e \'TYPE="indel" && (ReadPosRankSum < -20.0 || QD < 2.0 || FS > 200.0 || SOR > 10.0 )\' -m \'+\'' : "", + meta.caller == "elprep" ? '--soft-filter \'GATKCutoffIndel\' -e \'TYPE="indel" && (ReadPosRankSum < -20.0 || QD < 2.0 || FS > 200.0 || SOR > 10.0 )\' -m \'+\'' : "", + "--output-type z --write-index=tbi" + ].findAll { arg -> arg != "" }.join(" ") } } /* @@ -243,7 +223,7 @@ process { withName: "^.*GERMLINE:BCFTOOLS_NORM\$" { ext.prefix = {"${meta.id}.normalized"} - ext.args = "-m-" + ext.args = "-m- --output-type z --write-index=tbi" } /* @@ -264,7 +244,7 @@ process { withName: "^.*VCF_PED_RTGTOOLS:BCFTOOLS_ANNOTATE\$" { ext.prefix = { "${meta.id}.${meta.caller}.ped.annotated" } - ext.args = "--output-type z" + ext.args = "--output-type z --write-index=tbi" } /* @@ -301,20 +281,16 @@ process { ].join(' ').trim()} } - withName: "^.*VCF_ANNOTATION:VCF_ANNOTATE_ENSEMBLVEP:BCFTOOLS_CONCAT\$" { + withName: "^.*VCF_ANNOTATE_ENSEMBLVEP:BCFTOOLS_CONCAT\$" { ext.prefix = {"${meta.id}_concat"} ext.args = "--allow-overlaps --output-type z" } - withName: "^.*VCF_ANNOTATION:VCF_ANNOTATE_ENSEMBLVEP:BCFTOOLS_SORT\$" { + withName: "^.*VCF_ANNOTATE_ENSEMBLVEP:BCFTOOLS_SORT\$" { ext.prefix = {"${meta.id}.sorted"} + ext.args = "--write-index=tbi --output-type z" } - withName: "^.*VCF_ANNOTATION:BGZIP_ANNOTATED_VCFS\$" { - ext.prefix = {"${meta.id}.vcfanno"} - } - - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ VALIDATION diff --git a/modules.json b/modules.json index 36b21493..265f3c3a 100644 --- a/modules.json +++ b/modules.json @@ -51,11 +51,6 @@ "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, - "bedtools/intersect": { - "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] - }, "bedtools/merge": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", @@ -85,7 +80,7 @@ }, "ensemblvep/vep": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "ef36baef619ebe8a244fee313d44eba571ba73b4", "installed_by": ["modules"], "patch": "modules/nf-core/ensemblvep/vep/ensemblvep-vep.diff" }, @@ -191,16 +186,6 @@ "installed_by": ["modules"], "patch": "modules/nf-core/somalier/relate/somalier-relate.diff" }, - "tabix/bgzip": { - "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] - }, - "tabix/bgziptabix": { - "branch": "master", - "git_sha": "f448e846bdadd80fc8be31fbbc78d9f5b5131a45", - "installed_by": ["modules"] - }, "tabix/tabix": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", @@ -225,7 +210,7 @@ }, "vcfanno": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "b1137e22798227331c9a9a12bd92bd6e865865c5", "installed_by": ["modules"] } } diff --git a/modules/local/bcftools/filter/main.nf b/modules/local/bcftools/filter/main.nf new file mode 100644 index 00000000..2ef93b4c --- /dev/null +++ b/modules/local/bcftools/filter/main.nf @@ -0,0 +1,87 @@ +process BCFTOOLS_FILTER { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(vcf), path(tbi) + + output: + tuple val(meta), path("*.${extension}"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi, optional: true + tuple val(meta), path("*.csi") , emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def last_args = args3 ?: args2 ?: args + + extension = last_args.contains("--output-type b") || last_args.contains("-Ob") ? "bcf.gz" : + last_args.contains("--output-type u") || last_args.contains("-Ou") ? "bcf" : + last_args.contains("--output-type z") || last_args.contains("-Oz") ? "vcf.gz" : + last_args.contains("--output-type v") || last_args.contains("-Ov") ? "vcf" : + "vcf" + + if ("$vcf" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + + def filter_2 = args2 ? "| bcftools filter --threads ${task.cpus} ${args2}" : "" + def filter_3 = args3 ? "| bcftools filter --threads ${task.cpus} ${args3}" : "" + + """ + bcftools filter \\ + --threads ${task.cpus} \\ + $args \\ + $vcf \\ + ${filter_2} \\ + ${filter_3} \\ + --output ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def last_args = args3 ?: args2 ?: args + + extension = last_args.contains("--output-type b") || last_args.contains("-Ob") ? "bcf.gz" : + last_args.contains("--output-type u") || last_args.contains("-Ou") ? "bcf" : + last_args.contains("--output-type z") || last_args.contains("-Oz") ? "vcf.gz" : + last_args.contains("--output-type v") || last_args.contains("-Ov") ? "vcf" : + "vcf" + def index = last_args.contains("--write-index=tbi") || last_args.contains("-W=tbi") ? "tbi" : + last_args.contains("--write-index=csi") || last_args.contains("-W=csi") ? "csi" : + last_args.contains("--write-index") || last_args.contains("-W") ? "csi" : + "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index}" : "" + + if ("$vcf" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/intersect/tests/main.nf.test b/modules/local/bcftools/filter/tests/main.nf.test similarity index 52% rename from modules/nf-core/bedtools/intersect/tests/main.nf.test rename to modules/local/bcftools/filter/tests/main.nf.test index cd770946..643475e1 100644 --- a/modules/nf-core/bedtools/intersect/tests/main.nf.test +++ b/modules/local/bcftools/filter/tests/main.nf.test @@ -1,27 +1,26 @@ nextflow_process { - name "Test Process BEDTOOLS_INTERSECT" + name "Test Process BCFTOOLS_FILTER" script "../main.nf" - process "BEDTOOLS_INTERSECT" - config "./nextflow.config" + process "BCFTOOLS_FILTER" tag "modules" - tag "modules_nfcore" - tag "bedtools" - tag "bedtools/intersect" + tag "modules_local" + tag "bcftools" + tag "bcftools/filter" - test("sarscov2 - bed - bed") { + test("sarscov2 - 1 filter") { + + config "./one_filter.config" when { process { """ input[0] = [ - [ id:'test' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test2.bed', checkIfExists: true) + [id:"vcf_test"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [] ] - - input[1] = [[:], []] """ } } @@ -35,18 +34,18 @@ nextflow_process { } - test("sarscov2 - bam - bam") { + test("sarscov2 - 2 filters") { + + config "./two_filters.config" when { process { """ input[0] = [ - [ id:'test' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/baits.bed', checkIfExists: true) + [id:"vcf_test"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [] ] - - input[1] = [[:], []] """ } } @@ -60,20 +59,18 @@ nextflow_process { } - test("sarscov2 - bed - stub") { + test("sarscov2 - 3 filters") { - options "-stub" + config "./three_filters.config" when { process { """ input[0] = [ - [ id:'test' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test2.bed', checkIfExists: true) + [id:"vcf_test"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [] ] - - input[1] = [[:], []] """ } } diff --git a/modules/local/bcftools/filter/tests/main.nf.test.snap b/modules/local/bcftools/filter/tests/main.nf.test.snap new file mode 100644 index 00000000..f245dd7d --- /dev/null +++ b/modules/local/bcftools/filter/tests/main.nf.test.snap @@ -0,0 +1,167 @@ +{ + "sarscov2 - 2 filters": { + "content": [ + { + "0": [ + [ + { + "id": "vcf_test" + }, + "vcf_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + [ + { + "id": "vcf_test" + }, + "vcf_test.vcf.gz.tbi:md5,1a1edb22d2a33f9673449827e1cd38e8" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,9a336d1ee26b527d7a2bdbeead155f64" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "vcf_test" + }, + "vcf_test.vcf.gz.tbi:md5,1a1edb22d2a33f9673449827e1cd38e8" + ] + ], + "vcf": [ + [ + { + "id": "vcf_test" + }, + "vcf_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions": [ + "versions.yml:md5,9a336d1ee26b527d7a2bdbeead155f64" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.3" + }, + "timestamp": "2025-01-24T14:15:19.958449133" + }, + "sarscov2 - 3 filters": { + "content": [ + { + "0": [ + [ + { + "id": "vcf_test" + }, + "vcf_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + [ + { + "id": "vcf_test" + }, + "vcf_test.vcf.gz.tbi:md5,1a1edb22d2a33f9673449827e1cd38e8" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,9a336d1ee26b527d7a2bdbeead155f64" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "vcf_test" + }, + "vcf_test.vcf.gz.tbi:md5,1a1edb22d2a33f9673449827e1cd38e8" + ] + ], + "vcf": [ + [ + { + "id": "vcf_test" + }, + "vcf_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions": [ + "versions.yml:md5,9a336d1ee26b527d7a2bdbeead155f64" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.3" + }, + "timestamp": "2025-01-24T14:14:01.149643861" + }, + "sarscov2 - 1 filter": { + "content": [ + { + "0": [ + [ + { + "id": "vcf_test" + }, + "vcf_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + [ + { + "id": "vcf_test" + }, + "vcf_test.vcf.gz.tbi:md5,1a1edb22d2a33f9673449827e1cd38e8" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,9a336d1ee26b527d7a2bdbeead155f64" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "vcf_test" + }, + "vcf_test.vcf.gz.tbi:md5,1a1edb22d2a33f9673449827e1cd38e8" + ] + ], + "vcf": [ + [ + { + "id": "vcf_test" + }, + "vcf_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions": [ + "versions.yml:md5,9a336d1ee26b527d7a2bdbeead155f64" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.3" + }, + "timestamp": "2025-01-24T14:15:05.44617156" + } +} \ No newline at end of file diff --git a/modules/local/bcftools/filter/tests/one_filter.config b/modules/local/bcftools/filter/tests/one_filter.config new file mode 100644 index 00000000..77b2bd01 --- /dev/null +++ b/modules/local/bcftools/filter/tests/one_filter.config @@ -0,0 +1,3 @@ +process { + ext.args = {"--no-version --output-type z --write-index=tbi"} +} diff --git a/modules/local/bcftools/filter/tests/three_filters.config b/modules/local/bcftools/filter/tests/three_filters.config new file mode 100644 index 00000000..38a1e610 --- /dev/null +++ b/modules/local/bcftools/filter/tests/three_filters.config @@ -0,0 +1,5 @@ +process { + ext.args = {"--no-version"} + ext.args2 = {"--no-version"} + ext.args3 = {"--no-version --output-type z --write-index=tbi"} +} diff --git a/modules/local/bcftools/filter/tests/two_filters.config b/modules/local/bcftools/filter/tests/two_filters.config new file mode 100644 index 00000000..718fbbd5 --- /dev/null +++ b/modules/local/bcftools/filter/tests/two_filters.config @@ -0,0 +1,4 @@ +process { + ext.args = {"--no-version"} + ext.args2 = {"--no-version --output-type z --write-index=tbi"} +} diff --git a/modules/local/filter_beds/main.nf b/modules/local/process_beds/main.nf similarity index 75% rename from modules/local/filter_beds/main.nf rename to modules/local/process_beds/main.nf index d7954da0..5997a8ea 100644 --- a/modules/local/filter_beds/main.nf +++ b/modules/local/process_beds/main.nf @@ -1,4 +1,4 @@ -process FILTER_BEDS { +process PROCESS_BEDS { tag "$meta.id" label 'process_single' @@ -8,21 +8,27 @@ process FILTER_BEDS { 'biocontainers/bedtools:2.31.0--hf5e1c6e_2' }" input: - tuple val(meta), path(bed) + tuple val(meta), path(bed), path(roi) output: tuple val(meta), path('*.bed'), emit: bed path "versions.yml" , emit: versions script: - // Remove regions with no coverage from the callable regions BED file + // Remove regions with no coverage from the callable regions BED file and intersect with an optional ROI file def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def unzip = bed.extension == "gz" ? "zcat" : "cat" + def intersect = roi ? "| bedtools intersect -a ${roi} -b - ${args3}" : "" """ - ${unzip} ${bed} | grep ${args} | bedtools merge ${args2} > ${prefix}.bed + ${unzip} ${bed} \\ + | grep ${args} \\ + | bedtools merge ${args2} \\ + ${intersect} \\ + > ${prefix}.bed cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/filter_beds/tests/main.nf.test b/modules/local/process_beds/tests/main.nf.test similarity index 76% rename from modules/local/filter_beds/tests/main.nf.test rename to modules/local/process_beds/tests/main.nf.test index f139e316..8a4b1047 100644 --- a/modules/local/filter_beds/tests/main.nf.test +++ b/modules/local/process_beds/tests/main.nf.test @@ -1,12 +1,12 @@ nextflow_process { - name "Test Process FILTER_BEDS" + name "Test Process PROCESS_BEDS" script "../main.nf" - process "FILTER_BEDS" + process "PROCESS_BEDS" tag "modules" tag "modules_local" - tag "filter_beds" + tag "PROCESS_BEDS" test("homo_sapiens - bed") { @@ -17,7 +17,8 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.bed, checkIfExists:true) + file(params.bed, checkIfExists:true), + file(params.split1, checkIfExists:true) ] """ } @@ -42,7 +43,8 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.bed, checkIfExists:true) + file(params.bed, checkIfExists:true), + file(params.split1, checkIfExists:true) ] """ } diff --git a/modules/local/filter_beds/tests/main.nf.test.snap b/modules/local/process_beds/tests/main.nf.test.snap similarity index 72% rename from modules/local/filter_beds/tests/main.nf.test.snap rename to modules/local/process_beds/tests/main.nf.test.snap index c4e2ba18..96536e2a 100644 --- a/modules/local/filter_beds/tests/main.nf.test.snap +++ b/modules/local/process_beds/tests/main.nf.test.snap @@ -8,11 +8,11 @@ "id": "test", "single_end": false }, - "test.bed:md5,fa245abf8add7a80650566a1de67ec04" + "test.bed:md5,ffd6f70b7fa85ffb46fb66a4cf9afb70" ] ], "1": [ - "versions.yml:md5,38ea76fdc8d681b1e47415b195cccd88" + "versions.yml:md5,c6be2bd45c01066eacc08f419bce67d6" ], "bed": [ [ @@ -20,19 +20,19 @@ "id": "test", "single_end": false }, - "test.bed:md5,fa245abf8add7a80650566a1de67ec04" + "test.bed:md5,ffd6f70b7fa85ffb46fb66a4cf9afb70" ] ], "versions": [ - "versions.yml:md5,38ea76fdc8d681b1e47415b195cccd88" + "versions.yml:md5,c6be2bd45c01066eacc08f419bce67d6" ] } ], "meta": { "nf-test": "0.9.1", - "nextflow": "24.10.0" + "nextflow": "24.10.3" }, - "timestamp": "2024-11-13T17:12:06.470648263" + "timestamp": "2025-01-23T16:45:26.442970884" }, "homo_sapiens - bed - stub": { "content": [ @@ -47,7 +47,7 @@ ] ], "1": [ - "versions.yml:md5,38ea76fdc8d681b1e47415b195cccd88" + "versions.yml:md5,c6be2bd45c01066eacc08f419bce67d6" ], "bed": [ [ @@ -59,14 +59,14 @@ ] ], "versions": [ - "versions.yml:md5,38ea76fdc8d681b1e47415b195cccd88" + "versions.yml:md5,c6be2bd45c01066eacc08f419bce67d6" ] } ], "meta": { "nf-test": "0.9.1", - "nextflow": "24.10.0" + "nextflow": "24.10.3" }, - "timestamp": "2024-11-13T17:09:43.531020092" + "timestamp": "2025-01-23T16:45:35.803097114" } } \ No newline at end of file diff --git a/modules/local/filter_beds/tests/nextflow.config b/modules/local/process_beds/tests/nextflow.config similarity index 100% rename from modules/local/filter_beds/tests/nextflow.config rename to modules/local/process_beds/tests/nextflow.config diff --git a/modules/nf-core/bedtools/intersect/environment.yml b/modules/nf-core/bedtools/intersect/environment.yml deleted file mode 100644 index 5683bc05..00000000 --- a/modules/nf-core/bedtools/intersect/environment.yml +++ /dev/null @@ -1,5 +0,0 @@ -channels: - - conda-forge - - bioconda -dependencies: - - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/intersect/main.nf b/modules/nf-core/bedtools/intersect/main.nf deleted file mode 100644 index d9e79e7f..00000000 --- a/modules/nf-core/bedtools/intersect/main.nf +++ /dev/null @@ -1,59 +0,0 @@ -process BEDTOOLS_INTERSECT { - tag "$meta.id" - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : - 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" - - input: - tuple val(meta), path(intervals1), path(intervals2) - tuple val(meta2), path(chrom_sizes) - - output: - tuple val(meta), path("*.${extension}"), emit: intersect - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - //Extension of the output file. It is set by the user via "ext.suffix" in the config. Corresponds to the file format which depends on arguments (e. g., ".bed", ".bam", ".txt", etc.). - extension = task.ext.suffix ?: "${intervals1.extension}" - def sizes = chrom_sizes ? "-g ${chrom_sizes}" : '' - if ("$intervals1" == "${prefix}.${extension}" || - "$intervals2" == "${prefix}.${extension}") - error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - """ - bedtools \\ - intersect \\ - -a $intervals1 \\ - -b $intervals2 \\ - $args \\ - $sizes \\ - > ${prefix}.${extension} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - extension = task.ext.suffix ?: "bed" - if ("$intervals1" == "${prefix}.${extension}" || - "$intervals2" == "${prefix}.${extension}") - error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - """ - touch ${prefix}.${extension} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") - END_VERSIONS - """ -} diff --git a/modules/nf-core/bedtools/intersect/meta.yml b/modules/nf-core/bedtools/intersect/meta.yml deleted file mode 100644 index 45ecf377..00000000 --- a/modules/nf-core/bedtools/intersect/meta.yml +++ /dev/null @@ -1,63 +0,0 @@ -name: bedtools_intersect -description: Allows one to screen for overlaps between two sets of genomic features. -keywords: - - bed - - intersect - - overlap -tools: - - bedtools: - description: | - A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. - documentation: https://bedtools.readthedocs.io/en/latest/content/tools/intersect.html - licence: ["MIT"] - identifier: biotools:bedtools -input: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - intervals1: - type: file - description: BAM/BED/GFF/VCF - pattern: "*.{bam|bed|gff|vcf}" - - intervals2: - type: file - description: BAM/BED/GFF/VCF - pattern: "*.{bam|bed|gff|vcf}" - - - meta2: - type: map - description: | - Groovy Map containing reference chromosome sizes - e.g. [ id:'test' ] - - chrom_sizes: - type: file - description: Chromosome sizes file - pattern: "*{.sizes,.txt}" -output: - - intersect: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*.${extension}": - type: file - description: File containing the description of overlaps found between the two - features - pattern: "*.${extension}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@edmundmiller" - - "@sruthipsuresh" - - "@drpatelh" - - "@sidorov-si" -maintainers: - - "@edmundmiller" - - "@sruthipsuresh" - - "@drpatelh" - - "@sidorov-si" diff --git a/modules/nf-core/bedtools/intersect/tests/main.nf.test.snap b/modules/nf-core/bedtools/intersect/tests/main.nf.test.snap deleted file mode 100644 index b748dd49..00000000 --- a/modules/nf-core/bedtools/intersect/tests/main.nf.test.snap +++ /dev/null @@ -1,101 +0,0 @@ -{ - "sarscov2 - bam - bam": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test_out.bam:md5,738324efe2b1e442ceb6539a630c3fe6" - ] - ], - "1": [ - "versions.yml:md5,42ba439339672f4a9193f0f0fe7a7f64" - ], - "intersect": [ - [ - { - "id": "test" - }, - "test_out.bam:md5,738324efe2b1e442ceb6539a630c3fe6" - ] - ], - "versions": [ - "versions.yml:md5,42ba439339672f4a9193f0f0fe7a7f64" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-17T20:55:57.454847668" - }, - "sarscov2 - bed - bed": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test_out.bed:md5,afcbf01c2f2013aad71dbe8e34f2c15c" - ] - ], - "1": [ - "versions.yml:md5,42ba439339672f4a9193f0f0fe7a7f64" - ], - "intersect": [ - [ - { - "id": "test" - }, - "test_out.bed:md5,afcbf01c2f2013aad71dbe8e34f2c15c" - ] - ], - "versions": [ - "versions.yml:md5,42ba439339672f4a9193f0f0fe7a7f64" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-17T20:55:49.072132931" - }, - "sarscov2 - bed - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test_out.bed:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - "versions.yml:md5,42ba439339672f4a9193f0f0fe7a7f64" - ], - "intersect": [ - [ - { - "id": "test" - }, - "test_out.bed:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,42ba439339672f4a9193f0f0fe7a7f64" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-17T20:56:06.259192552" - } -} \ No newline at end of file diff --git a/modules/nf-core/bedtools/intersect/tests/nextflow.config b/modules/nf-core/bedtools/intersect/tests/nextflow.config deleted file mode 100644 index f1f9e693..00000000 --- a/modules/nf-core/bedtools/intersect/tests/nextflow.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: BEDTOOLS_INTERSECT { - ext.prefix = { "${meta.id}_out" } - } -} diff --git a/modules/nf-core/bedtools/intersect/tests/tags.yml b/modules/nf-core/bedtools/intersect/tests/tags.yml deleted file mode 100644 index 6219cc40..00000000 --- a/modules/nf-core/bedtools/intersect/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -bedtools/intersect: - - "modules/nf-core/bedtools/intersect/**" diff --git a/modules/nf-core/ensemblvep/vep/ensemblvep-vep.diff b/modules/nf-core/ensemblvep/vep/ensemblvep-vep.diff index bcc6ba95..eab0611f 100644 --- a/modules/nf-core/ensemblvep/vep/ensemblvep-vep.diff +++ b/modules/nf-core/ensemblvep/vep/ensemblvep-vep.diff @@ -1,4 +1,4 @@ -Changes in module 'nf-core/ensemblvep/vep' +Changes in component 'nf-core/ensemblvep/vep' Changes in 'ensemblvep/vep/main.nf': --- modules/nf-core/ensemblvep/vep/main.nf +++ modules/nf-core/ensemblvep/vep/main.nf @@ -6,8 +6,8 @@ Changes in 'ensemblvep/vep/main.nf': conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? -- 'https://depot.galaxyproject.org/singularity/ensembl-vep:112.0--pl5321h2a3209d_0' : -- 'biocontainers/ensembl-vep:112.0--pl5321h2a3209d_0' }" +- 'https://depot.galaxyproject.org/singularity/ensembl-vep:113.0--pl5321h2a3209d_0' : +- 'biocontainers/ensembl-vep:113.0--pl5321h2a3209d_0' }" + 'https://depot.galaxyproject.org/singularity/ensembl-vep:105.0--pl5321h4a94de4_1' : + 'biocontainers/ensembl-vep:105.0--pl5321h4a94de4_1' }" @@ -22,11 +22,11 @@ Changes in 'ensemblvep/vep/main.nf': Changes in 'ensemblvep/vep/tests/main.nf.test': --- modules/nf-core/ensemblvep/vep/tests/main.nf.test +++ modules/nf-core/ensemblvep/vep/tests/main.nf.test -@@ -107,7 +107,7 @@ +@@ -110,7 +110,7 @@ assertAll( { assert process.success }, { assert snapshot(process.out.versions).match() }, -- { assert path(process.out.tab.get(0).get(1)).linesGzip.contains("## ENSEMBL VARIANT EFFECT PREDICTOR v112.0") } +- { assert path(process.out.tab.get(0).get(1)).linesGzip.contains("## ENSEMBL VARIANT EFFECT PREDICTOR v113.0") } + { assert path(process.out.tab.get(0).get(1)).linesGzip.contains("## ENSEMBL VARIANT EFFECT PREDICTOR v105.0") } ) } @@ -35,38 +35,35 @@ Changes in 'ensemblvep/vep/tests/main.nf.test': Changes in 'ensemblvep/vep/tests/main.nf.test.snap': --- modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap +++ modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap -@@ -2,25 +2,25 @@ +@@ -2,19 +2,19 @@ "test_ensemblvep_vep_fasta_tab_gz": { "content": [ [ -- "versions.yml:md5,d06f1eb60f534489026d682eb3aa5559" -+ "versions.yml:md5,c6d58a35e7be5e6ab46a3f9757f6e259" +- "versions.yml:md5,534306f30b29b830c409da4b0a26bd20" ++ "versions.yml:md5,4faee762040f2f0241cdb8b48a2ac759" ] ], "meta": { -- "nf-test": "0.8.4", -- "nextflow": "24.04.4" -+ "nf-test": "0.9.1", -+ "nextflow": "24.10.0" + "nf-test": "0.9.1", + "nextflow": "24.10.3" }, -- "timestamp": "2024-09-02T10:15:18.228927" -+ "timestamp": "2024-11-20T14:10:59.846254319" +- "timestamp": "2025-01-24T10:03:33.681292738" ++ "timestamp": "2025-01-24T11:32:50.653988125" }, - "test_ensemblvep_vep_fasta_vcf": { + "test_ensemblvep_vep_fasta_vcf - stub (not really but linting complains otherwise)": { "content": [ [ -- "versions.yml:md5,d06f1eb60f534489026d682eb3aa5559" -+ "versions.yml:md5,c6d58a35e7be5e6ab46a3f9757f6e259" - ] - ], - "meta": { -- "nf-test": "0.8.4", -- "nextflow": "24.04.4" -+ "nf-test": "0.9.1", -+ "nextflow": "24.10.0" +- "versions.yml:md5,534306f30b29b830c409da4b0a26bd20" ++ "versions.yml:md5,4faee762040f2f0241cdb8b48a2ac759" + ], + "d41d8cd98f00b204e9800998ecf8427e", + "test.vcf.gz.tbi" +@@ -23,6 +23,6 @@ + "nf-test": "0.9.1", + "nextflow": "24.10.3" }, -- "timestamp": "2024-09-02T10:14:50.193861" -+ "timestamp": "2024-11-20T14:10:44.092773407" +- "timestamp": "2025-01-24T10:15:45.231110684" ++ "timestamp": "2025-01-24T11:32:28.2162697" } } 'modules/nf-core/ensemblvep/vep/tests/nextflow.config' is unchanged diff --git a/modules/nf-core/ensemblvep/vep/environment.yml b/modules/nf-core/ensemblvep/vep/environment.yml index 283a45bb..3d36eb17 100644 --- a/modules/nf-core/ensemblvep/vep/environment.yml +++ b/modules/nf-core/ensemblvep/vep/environment.yml @@ -2,4 +2,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::ensembl-vep=112.0 + - bioconda::ensembl-vep=113.0 diff --git a/modules/nf-core/ensemblvep/vep/main.nf b/modules/nf-core/ensemblvep/vep/main.nf index 654dfec9..4da9598d 100644 --- a/modules/nf-core/ensemblvep/vep/main.nf +++ b/modules/nf-core/ensemblvep/vep/main.nf @@ -17,22 +17,25 @@ process ENSEMBLVEP_VEP { path extra_files output: - tuple val(meta), path("*.vcf.gz") , optional:true, emit: vcf - tuple val(meta), path("*.tab.gz") , optional:true, emit: tab - tuple val(meta), path("*.json.gz") , optional:true, emit: json - path "*.html" , optional:true, emit: report - path "versions.yml" , emit: versions + tuple val(meta), path("*.vcf.gz") , optional:true, emit: vcf + tuple val(meta), path("*.vcf.gz.tbi") , optional:true, emit: tbi + tuple val(meta), path("*.tab.gz") , optional:true, emit: tab + tuple val(meta), path("*.json.gz") , optional:true, emit: json + path "*.html" , optional:true, emit: report + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' def file_extension = args.contains("--vcf") ? 'vcf' : args.contains("--json")? 'json' : args.contains("--tab")? 'tab' : 'vcf' def compress_cmd = args.contains("--compress_output") ? '' : '--compress_output bgzip' def prefix = task.ext.prefix ?: "${meta.id}" def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep" def reference = fasta ? "--fasta $fasta" : "" + def create_index = file_extension == "vcf" ? "tabix ${args2} ${prefix}.${file_extension}.gz" : "" """ vep \\ -i $vcf \\ @@ -47,24 +50,28 @@ process ENSEMBLVEP_VEP { --dir_cache $dir_cache \\ --fork $task.cpus + ${create_index} cat <<-END_VERSIONS > versions.yml "${task.process}": ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" + def file_extension = args.contains("--vcf") ? 'vcf' : args.contains("--json")? 'json' : args.contains("--tab")? 'tab' : 'vcf' + def create_index = file_extension == "vcf" ? "touch ${prefix}.${file_extension}.gz.tbi" : "" """ - echo "" | gzip > ${prefix}.vcf.gz - echo "" | gzip > ${prefix}.tab.gz - echo "" | gzip > ${prefix}.json.gz + echo "" | gzip > ${prefix}.${file_extension}.gz + ${create_index} touch ${prefix}_summary.html cat <<-END_VERSIONS > versions.yml "${task.process}": ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') END_VERSIONS """ } diff --git a/modules/nf-core/ensemblvep/vep/meta.yml b/modules/nf-core/ensemblvep/vep/meta.yml index 9288a938..cbb194fd 100644 --- a/modules/nf-core/ensemblvep/vep/meta.yml +++ b/modules/nf-core/ensemblvep/vep/meta.yml @@ -63,15 +63,24 @@ input: output: - vcf: - meta: - type: file + type: map description: | - annotated vcf (optional) - pattern: "*.ann.vcf.gz" + Map with sample information - "*.vcf.gz": type: file description: | annotated vcf (optional) - pattern: "*.ann.vcf.gz" + pattern: "*.vcf.gz" + - tbi: + - meta: + type: map + description: | + Map with sample information + - "*.vcf.gz.tbi": + type: file + description: | + annotated vcf index (optional) + pattern: "*.vcf.gz.tbi" - tab: - meta: type: file diff --git a/modules/nf-core/ensemblvep/vep/tests/main.nf.test b/modules/nf-core/ensemblvep/vep/tests/main.nf.test index f66e867e..8b0807bb 100644 --- a/modules/nf-core/ensemblvep/vep/tests/main.nf.test +++ b/modules/nf-core/ensemblvep/vep/tests/main.nf.test @@ -11,7 +11,7 @@ nextflow_process { tag "ensemblvep/vep" tag "ensemblvep/download" - test("test_ensemblvep_vep_fasta_vcf") { + test("test_ensemblvep_vep_fasta_vcf - stub (not really but linting complains otherwise)") { config "./vcf.config" setup { @@ -21,7 +21,7 @@ nextflow_process { process { """ input[0] = Channel.of([ - [id:"112_WBcel235"], + [id:"113_WBcel235"], params.vep_genome, params.vep_species, params.vep_cache_version @@ -55,8 +55,11 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.versions).match() }, - { assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2") } + { assert snapshot( + process.out.versions, + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + file(process.out.tbi.get(0).get(1)).name + ).match() } ) } @@ -72,7 +75,7 @@ nextflow_process { process { """ input[0] = Channel.of([ - [id:"112_WBcel235"], + [id:"113_WBcel235"], params.vep_genome, params.vep_species, params.vep_cache_version @@ -111,4 +114,4 @@ nextflow_process { ) } } -} +} \ No newline at end of file diff --git a/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap b/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap index 2d215500..cc51d787 100644 --- a/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap +++ b/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap @@ -2,25 +2,27 @@ "test_ensemblvep_vep_fasta_tab_gz": { "content": [ [ - "versions.yml:md5,c6d58a35e7be5e6ab46a3f9757f6e259" + "versions.yml:md5,4faee762040f2f0241cdb8b48a2ac759" ] ], "meta": { "nf-test": "0.9.1", - "nextflow": "24.10.0" + "nextflow": "24.10.3" }, - "timestamp": "2024-11-20T14:10:59.846254319" + "timestamp": "2025-01-24T14:46:45.902677788" }, - "test_ensemblvep_vep_fasta_vcf": { + "test_ensemblvep_vep_fasta_vcf - stub (not really but linting complains otherwise)": { "content": [ [ - "versions.yml:md5,c6d58a35e7be5e6ab46a3f9757f6e259" - ] + "versions.yml:md5,4faee762040f2f0241cdb8b48a2ac759" + ], + "d41d8cd98f00b204e9800998ecf8427e", + "test.vcf.gz.tbi" ], "meta": { "nf-test": "0.9.1", - "nextflow": "24.10.0" + "nextflow": "24.10.3" }, - "timestamp": "2024-11-20T14:10:44.092773407" + "timestamp": "2025-01-24T14:46:27.812227223" } } \ No newline at end of file diff --git a/modules/nf-core/ensemblvep/vep/tests/nextflow.config b/modules/nf-core/ensemblvep/vep/tests/nextflow.config index 9aa48164..0a4ae1a6 100644 --- a/modules/nf-core/ensemblvep/vep/tests/nextflow.config +++ b/modules/nf-core/ensemblvep/vep/tests/nextflow.config @@ -1,5 +1,5 @@ params { - vep_cache_version = "112" + vep_cache_version = "113" vep_genome = "WBcel235" vep_species = "caenorhabditis_elegans" } diff --git a/modules/nf-core/tabix/bgzip/environment.yml b/modules/nf-core/tabix/bgzip/environment.yml deleted file mode 100644 index 017c259d..00000000 --- a/modules/nf-core/tabix/bgzip/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -channels: - - conda-forge - - bioconda - -dependencies: - - bioconda::htslib=1.20 - - bioconda::tabix=1.11 diff --git a/modules/nf-core/tabix/bgzip/main.nf b/modules/nf-core/tabix/bgzip/main.nf deleted file mode 100644 index 67991c74..00000000 --- a/modules/nf-core/tabix/bgzip/main.nf +++ /dev/null @@ -1,55 +0,0 @@ -process TABIX_BGZIP { - tag "$meta.id" - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/htslib:1.20--h5efdd21_2' : - 'biocontainers/htslib:1.20--h5efdd21_2' }" - - input: - tuple val(meta), path(input) - - output: - tuple val(meta), path("${output}") , emit: output - tuple val(meta), path("${output}.gzi"), emit: gzi, optional: true - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - in_bgzip = ["gz", "bgz", "bgzf"].contains(input.getExtension()) - extension = in_bgzip ? input.getBaseName().tokenize(".")[-1] : input.getExtension() - output = in_bgzip ? "${prefix}.${extension}" : "${prefix}.${extension}.gz" - command = in_bgzip ? '-d' : '' - // Name the index according to $prefix, unless a name has been requested - if ((args.matches("(^| )-i\\b") || args.matches("(^| )--index(\$| )")) && !args.matches("(^| )-I\\b") && !args.matches("(^| )--index-name\\b")) { - args = args + " -I ${output}.gzi" - } - """ - bgzip $command -c $args -@${task.cpus} $input > ${output} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS - """ - - stub: - prefix = task.ext.prefix ?: "${meta.id}" - in_bgzip = ["gz", "bgz", "bgzf"].contains(input.getExtension()) - output = in_bgzip ? input.getBaseName() : "${prefix}.${input.getExtension()}.gz" - - """ - echo "" | gzip > ${output} - touch ${output}.gzi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/tabix/bgzip/meta.yml b/modules/nf-core/tabix/bgzip/meta.yml deleted file mode 100644 index 131e92cf..00000000 --- a/modules/nf-core/tabix/bgzip/meta.yml +++ /dev/null @@ -1,61 +0,0 @@ -name: tabix_bgzip -description: Compresses/decompresses files -keywords: - - compress - - decompress - - bgzip - - tabix -tools: - - bgzip: - description: | - Bgzip compresses or decompresses files in a similar manner to, and compatible with, gzip. - homepage: https://www.htslib.org/doc/tabix.html - documentation: http://www.htslib.org/doc/bgzip.html - doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] - identifier: biotools:tabix -input: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: file to compress or to decompress -output: - - output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - ${output}: - type: file - description: Output compressed/decompressed file - pattern: "*." - - gzi: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - ${output}.gzi: - type: file - description: Optional gzip index file for compressed inputs - pattern: "*.gzi" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@joseespinosa" - - "@drpatelh" - - "@maxulysse" - - "@nvnieuwk" -maintainers: - - "@joseespinosa" - - "@drpatelh" - - "@maxulysse" - - "@nvnieuwk" diff --git a/modules/nf-core/tabix/bgzip/tests/bgzip_compress.config b/modules/nf-core/tabix/bgzip/tests/bgzip_compress.config deleted file mode 100644 index 6b6ff55f..00000000 --- a/modules/nf-core/tabix/bgzip/tests/bgzip_compress.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: TABIX_BGZIP { - ext.args = ' -i' - } -} diff --git a/modules/nf-core/tabix/bgzip/tests/main.nf.test b/modules/nf-core/tabix/bgzip/tests/main.nf.test deleted file mode 100644 index d784aa07..00000000 --- a/modules/nf-core/tabix/bgzip/tests/main.nf.test +++ /dev/null @@ -1,111 +0,0 @@ -nextflow_process { - - name "Test Process TABIX_BGZIP" - script "modules/nf-core/tabix/bgzip/main.nf" - process "TABIX_BGZIP" - - tag "modules" - tag "modules_nfcore" - tag "tabix" - tag "tabix/bgzip" - - test("sarscov2_vcf_bgzip_compress") { - when { - process { - """ - input[0] = [ - [ id:'bgzip_test' ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] - ] - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() }, - { assert snapshot( - file(process.out.output[0][1]).name - ).match("bgzip_test") - } - ) - } - } - - test("homo_genome_bedgz_compress") { - when { - process { - """ - input[0] = [ - [ id:'bedgz_test' ], - [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed.gz', checkIfExists: true) ] - ] - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() }, - { assert snapshot( - file(process.out.output[0][1]).name - ).match("bedgz_test") - } - ) - } - } - - test("sarscov2_vcf_bgzip_compress_stub") { - options '-stub' - config "./bgzip_compress.config" - - when { - process { - """ - input[0] = [ - [ id:"test_stub" ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] - ] - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() }, - { assert snapshot( - file(process.out.output[0][1]).name - ).match("test_stub") - } - ) - } - } - - test("sarscov2_vcf_bgzip_compress_gzi") { - config "./bgzip_compress.config" - when { - process { - """ - input[0] = [ - [ id:"gzi_compress_test" ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] - ] - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() }, - { assert snapshot( - file(process.out.gzi[0][1]).name - ).match("gzi_compress_test") - } - ) - } - } -} diff --git a/modules/nf-core/tabix/bgzip/tests/main.nf.test.snap b/modules/nf-core/tabix/bgzip/tests/main.nf.test.snap deleted file mode 100644 index 0748143f..00000000 --- a/modules/nf-core/tabix/bgzip/tests/main.nf.test.snap +++ /dev/null @@ -1,218 +0,0 @@ -{ - "gzi_compress_test": { - "content": [ - "gzi_compress_test.vcf.gz.gzi" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-02-19T14:52:29.328146" - }, - "homo_genome_bedgz_compress": { - "content": [ - { - "0": [ - [ - { - "id": "bedgz_test" - }, - "bedgz_test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" - ] - ], - "1": [ - - ], - "2": [ - "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" - ], - "gzi": [ - - ], - "output": [ - [ - { - "id": "bedgz_test" - }, - "bedgz_test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" - ] - ], - "versions": [ - "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-07-19T11:28:34.159992362" - }, - "test_stub": { - "content": [ - "test_stub.vcf.gz" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-02-19T14:52:20.811489" - }, - "sarscov2_vcf_bgzip_compress": { - "content": [ - { - "0": [ - [ - { - "id": "bgzip_test" - }, - "bgzip_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" - ] - ], - "1": [ - - ], - "2": [ - "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" - ], - "gzi": [ - - ], - "output": [ - [ - { - "id": "bgzip_test" - }, - "bgzip_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" - ] - ], - "versions": [ - "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-07-19T11:28:22.087769106" - }, - "sarscov2_vcf_bgzip_compress_gzi": { - "content": [ - { - "0": [ - [ - { - "id": "gzi_compress_test" - }, - "gzi_compress_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" - ] - ], - "1": [ - [ - { - "id": "gzi_compress_test" - }, - "gzi_compress_test.vcf.gz.gzi:md5,26fd00d4e26141cd11561f6e7d4a2ad0" - ] - ], - "2": [ - "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" - ], - "gzi": [ - [ - { - "id": "gzi_compress_test" - }, - "gzi_compress_test.vcf.gz.gzi:md5,26fd00d4e26141cd11561f6e7d4a2ad0" - ] - ], - "output": [ - [ - { - "id": "gzi_compress_test" - }, - "gzi_compress_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" - ] - ], - "versions": [ - "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-07-19T11:28:57.15091665" - }, - "bgzip_test": { - "content": [ - "bgzip_test.vcf.gz" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-02-19T14:52:03.768295" - }, - "bedgz_test": { - "content": [ - "bedgz_test.bed" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-02-19T14:52:12.453855" - }, - "sarscov2_vcf_bgzip_compress_stub": { - "content": [ - { - "0": [ - [ - { - "id": "test_stub" - }, - "test_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "1": [ - [ - { - "id": "test_stub" - }, - "test_stub.vcf.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" - ], - "gzi": [ - [ - { - "id": "test_stub" - }, - "test_stub.vcf.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "output": [ - [ - { - "id": "test_stub" - }, - "test_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "versions": [ - "versions.yml:md5,753c29916d45debdde52f4ac7c745f61" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-07-19T11:28:45.219404786" - } -} \ No newline at end of file diff --git a/modules/nf-core/tabix/bgzip/tests/tags.yml b/modules/nf-core/tabix/bgzip/tests/tags.yml deleted file mode 100644 index de0eec86..00000000 --- a/modules/nf-core/tabix/bgzip/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -tabix/bgzip: - - "modules/nf-core/tabix/bgzip/**" diff --git a/modules/nf-core/tabix/bgzip/tests/vcf_none.config b/modules/nf-core/tabix/bgzip/tests/vcf_none.config deleted file mode 100644 index f3a3c467..00000000 --- a/modules/nf-core/tabix/bgzip/tests/vcf_none.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: TABIX_BGZIP { - ext.args = '' - } -} diff --git a/modules/nf-core/tabix/bgziptabix/environment.yml b/modules/nf-core/tabix/bgziptabix/environment.yml deleted file mode 100644 index 017c259d..00000000 --- a/modules/nf-core/tabix/bgziptabix/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -channels: - - conda-forge - - bioconda - -dependencies: - - bioconda::htslib=1.20 - - bioconda::tabix=1.11 diff --git a/modules/nf-core/tabix/bgziptabix/main.nf b/modules/nf-core/tabix/bgziptabix/main.nf deleted file mode 100644 index 22f37a77..00000000 --- a/modules/nf-core/tabix/bgziptabix/main.nf +++ /dev/null @@ -1,48 +0,0 @@ -process TABIX_BGZIPTABIX { - tag "$meta.id" - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/htslib:1.20--h5efdd21_2' : - 'biocontainers/htslib:1.20--h5efdd21_2' }" - - input: - tuple val(meta), path(input) - - output: - tuple val(meta), path("*.gz"), path("*.tbi"), optional: true, emit: gz_tbi - tuple val(meta), path("*.gz"), path("*.csi"), optional: true, emit: gz_csi - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - bgzip --threads ${task.cpus} -c $args $input > ${prefix}.${input.getExtension()}.gz - tabix --threads ${task.cpus} $args2 ${prefix}.${input.getExtension()}.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - def args2 = task.ext.args2 ?: '' - def index = args2.contains("-C ") || args2.contains("--csi") ? "csi" : "tbi" - """ - echo "" | gzip > ${prefix}.${input.getExtension()}.gz - touch ${prefix}.${input.getExtension()}.gz.${index} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/tabix/bgziptabix/meta.yml b/modules/nf-core/tabix/bgziptabix/meta.yml deleted file mode 100644 index 806fbc12..00000000 --- a/modules/nf-core/tabix/bgziptabix/meta.yml +++ /dev/null @@ -1,65 +0,0 @@ -name: tabix_bgziptabix -description: bgzip a sorted tab-delimited genome file and then create tabix index -keywords: - - bgzip - - compress - - index - - tabix - - vcf -tools: - - tabix: - description: Generic indexer for TAB-delimited genome position files. - homepage: https://www.htslib.org/doc/tabix.html - documentation: https://www.htslib.org/doc/tabix.1.html - doi: 10.1093/bioinformatics/btq671 - licence: ["MIT"] - identifier: biotools:tabix -input: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: Sorted tab-delimited genome file -output: - - gz_tbi: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*.gz": - type: file - description: bgzipped tab-delimited genome file - pattern: "*.gz" - - "*.tbi": - type: file - description: tabix index file - pattern: "*.tbi" - - gz_csi: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*.gz": - type: file - description: bgzipped tab-delimited genome file - pattern: "*.gz" - - "*.csi": - type: file - description: csi index file - pattern: "*.csi" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@maxulysse" - - "@DLBPointon" -maintainers: - - "@maxulysse" - - "@DLBPointon" diff --git a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test deleted file mode 100644 index 4d4130dc..00000000 --- a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test +++ /dev/null @@ -1,123 +0,0 @@ -nextflow_process { - - name "Test Process TABIX_BGZIPTABIX" - script "modules/nf-core/tabix/bgziptabix/main.nf" - process "TABIX_BGZIPTABIX" - - tag "modules" - tag "modules_nfcore" - tag "tabix" - tag "tabix/bgziptabix" - - test("sarscov2_bed_tbi") { - config "./tabix_tbi.config" - - when { - process { - """ - input[0] = [ - [ id:'tbi_test' ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] - ] - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() }, - { assert snapshot( - file(process.out.gz_tbi[0][1]).name - ).match("tbi_test") - } - ) - } - } - - test("sarscov2_bed_csi") { - config "./tabix_csi.config" - - when { - process { - """ - input[0] = [ - [ id:'csi_test' ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] - ] - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() }, - { assert snapshot( - file(process.out.gz_csi[0][1]).name - ).match("csi_test") - } - ) - } - - } - - test("sarscov2_bed_csi_stub") { - config "./tabix_csi.config" - - options "-stub" - - when { - process { - """ - input[0] = [ - [ id:'test' ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] - ] - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() }, - { assert snapshot( - file(process.out.gz_csi[0][1]).name - ).match("csi_stub") - } - ) - } - - } - - test("sarscov2_bed_tbi_stub") { - config "./tabix_tbi.config" - - options "-stub" - - when { - process { - """ - input[0] = [ - [ id:'test' ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] - ] - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() }, - { assert snapshot( - file(process.out.gz_tbi[0][1]).name - ).match("tbi_stub") - } - ) - } - - } - -} diff --git a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap deleted file mode 100644 index fb87799b..00000000 --- a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap +++ /dev/null @@ -1,206 +0,0 @@ -{ - "sarscov2_bed_tbi": { - "content": [ - { - "0": [ - [ - { - "id": "tbi_test" - }, - "tbi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74", - "tbi_test.bed.gz.tbi:md5,ca06caf88b1e3c67d5fcba0a1460b52c" - ] - ], - "1": [ - - ], - "2": [ - "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa" - ], - "gz_csi": [ - - ], - "gz_tbi": [ - [ - { - "id": "tbi_test" - }, - "tbi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74", - "tbi_test.bed.gz.tbi:md5,ca06caf88b1e3c67d5fcba0a1460b52c" - ] - ], - "versions": [ - "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-07-19T11:29:16.053817543" - }, - "sarscov2_bed_csi": { - "content": [ - { - "0": [ - - ], - "1": [ - [ - { - "id": "csi_test" - }, - "csi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74", - "csi_test.bed.gz.csi:md5,c9c0377de58fdc89672bb3005a0d69f5" - ] - ], - "2": [ - "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa" - ], - "gz_csi": [ - [ - { - "id": "csi_test" - }, - "csi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74", - "csi_test.bed.gz.csi:md5,c9c0377de58fdc89672bb3005a0d69f5" - ] - ], - "gz_tbi": [ - - ], - "versions": [ - "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-07-19T11:29:27.667745444" - }, - "csi_test": { - "content": [ - "csi_test.bed.gz" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-02-19T14:51:00.548801" - }, - "sarscov2_bed_tbi_stub": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test.bed.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - - ], - "2": [ - "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa" - ], - "gz_csi": [ - - ], - "gz_tbi": [ - [ - { - "id": "test" - }, - "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test.bed.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa" - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-25T14:45:18.533169949" - }, - "csi_stub": { - "content": [ - "test.bed.gz" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-02-19T14:51:09.218454" - }, - "tbi_stub": { - "content": [ - "test.bed.gz" - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-25T14:45:18.550930179" - }, - "tbi_test": { - "content": [ - "tbi_test.bed.gz" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-02-19T14:50:51.579654" - }, - "sarscov2_bed_csi_stub": { - "content": [ - { - "0": [ - - ], - "1": [ - [ - { - "id": "test" - }, - "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa" - ], - "gz_csi": [ - [ - { - "id": "test" - }, - "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "gz_tbi": [ - - ], - "versions": [ - "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa" - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-09-25T14:44:19.786135972" - } -} \ No newline at end of file diff --git a/modules/nf-core/tabix/bgziptabix/tests/tabix_csi.config b/modules/nf-core/tabix/bgziptabix/tests/tabix_csi.config deleted file mode 100644 index fb41a314..00000000 --- a/modules/nf-core/tabix/bgziptabix/tests/tabix_csi.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: TABIX_BGZIPTABIX { - ext.args2 = '-p vcf --csi' - } -} diff --git a/modules/nf-core/tabix/bgziptabix/tests/tabix_tbi.config b/modules/nf-core/tabix/bgziptabix/tests/tabix_tbi.config deleted file mode 100644 index c1915dc4..00000000 --- a/modules/nf-core/tabix/bgziptabix/tests/tabix_tbi.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: TABIX_BGZIPTABIX { - ext.args2 = '-p vcf' - } -} \ No newline at end of file diff --git a/modules/nf-core/tabix/bgziptabix/tests/tags.yml b/modules/nf-core/tabix/bgziptabix/tests/tags.yml deleted file mode 100644 index 5052b4d7..00000000 --- a/modules/nf-core/tabix/bgziptabix/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -tabix/bgziptabix: - - "modules/nf-core/tabix/bgziptabix/**" diff --git a/modules/nf-core/vcfanno/environment.yml b/modules/nf-core/vcfanno/environment.yml index 32c48b15..da0f73e4 100644 --- a/modules/nf-core/vcfanno/environment.yml +++ b/modules/nf-core/vcfanno/environment.yml @@ -1,5 +1,7 @@ channels: - conda-forge - bioconda + dependencies: + - bioconda::htslib=1.21 - bioconda::vcfanno=0.3.5 diff --git a/modules/nf-core/vcfanno/main.nf b/modules/nf-core/vcfanno/main.nf index 25c131b1..c2274d75 100644 --- a/modules/nf-core/vcfanno/main.nf +++ b/modules/nf-core/vcfanno/main.nf @@ -4,8 +4,8 @@ process VCFANNO { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/vcfanno:0.3.5--h9ee0642_0': - 'biocontainers/vcfanno:0.3.5--h9ee0642_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/d6/d6a1af15acc0fbec648812e07ccb4c1c39a926f3a98031a50f51c5b859e543e1/data': + 'community.wave.seqera.io/library/htslib_vcfanno:398cde9953538855' }" input: tuple val(meta), path(vcf), path(tbi), path(specific_resources) @@ -14,15 +14,18 @@ process VCFANNO { path resources output: - tuple val(meta), path("*.vcf") , emit: vcf - path "versions.yml" , emit: versions + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.vcf.gz.tbi") , emit: tbi + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" def lua_cmd = lua ? "--lua ${lua}" : "" """ vcfanno \\ @@ -31,7 +34,9 @@ process VCFANNO { ${lua_cmd} \\ ${toml} \\ ${vcf} \\ - > ${prefix}.vcf + | bgzip ${args2} --threads ${task.cpus} \\ + > ${prefix}.vcf.gz \\ + && tabix ${args3} ${prefix}.vcf.gz cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -42,7 +47,8 @@ process VCFANNO { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.vcf + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/vcfanno/meta.yml b/modules/nf-core/vcfanno/meta.yml index 18d27127..2f5b3add 100644 --- a/modules/nf-core/vcfanno/meta.yml +++ b/modules/nf-core/vcfanno/meta.yml @@ -53,10 +53,26 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.vcf": - type: file - description: Annotated VCF file - pattern: "*.vcf" + pattern: "*.vcf.gz" + - "*.vcf.gz": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.vcf.gz" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.vcf.gz.tbi" + - "*.vcf.gz.tbi": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.vcf.gz.tbi" - versions: - versions.yml: type: file @@ -65,6 +81,8 @@ output: authors: - "@projectoriented" - "@matthdsm" + - "@nvnieuwk" maintainers: - "@projectoriented" - "@matthdsm" + - "@nvnieuwk" diff --git a/modules/nf-core/vcfanno/tests/main.nf.test.snap b/modules/nf-core/vcfanno/tests/main.nf.test.snap index 7e5f737c..ca4d76d2 100644 --- a/modules/nf-core/vcfanno/tests/main.nf.test.snap +++ b/modules/nf-core/vcfanno/tests/main.nf.test.snap @@ -1,12 +1,16 @@ { "sarscov2 - [vcf(gz), tbi, vcf], [], toml, [vcf, tbi] - stub": { "content": [ - "test_compressed.vcf", + "test_compressed.vcf.gz", [ "versions.yml:md5,5ff0991b612706ce15d82eb1564513b0" ] ], - "timestamp": "2023-12-06T12:18:25.69588598" + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.3" + }, + "timestamp": "2025-01-23T14:03:21.974046858" }, "sarscov2 - [vcf(gz), tbi, vcf], [], toml, [vcf, tbi]": { "content": [ @@ -17,19 +21,37 @@ "id": "test_compressed", "single_end": false }, - "test_compressed.vcf:md5,d3cf5a6eaf6cca5b957833a313c5fbf4" + "test_compressed.vcf.gz:md5,d3cf5a6eaf6cca5b957833a313c5fbf4" ] ], "1": [ + [ + { + "id": "test_compressed", + "single_end": false + }, + "test_compressed.vcf.gz.tbi:md5,67a4272d5897fea5cc395dc87afc3629" + ] + ], + "2": [ "versions.yml:md5,5ff0991b612706ce15d82eb1564513b0" ], + "tbi": [ + [ + { + "id": "test_compressed", + "single_end": false + }, + "test_compressed.vcf.gz.tbi:md5,67a4272d5897fea5cc395dc87afc3629" + ] + ], "vcf": [ [ { "id": "test_compressed", "single_end": false }, - "test_compressed.vcf:md5,d3cf5a6eaf6cca5b957833a313c5fbf4" + "test_compressed.vcf.gz:md5,d3cf5a6eaf6cca5b957833a313c5fbf4" ] ], "versions": [ @@ -37,7 +59,11 @@ ] } ], - "timestamp": "2023-12-06T12:21:13.209704154" + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.3" + }, + "timestamp": "2025-01-23T14:04:38.599392384" }, "sarscov2 - [vcf, [], vcf], [], toml, [vcf, tbi]": { "content": [ @@ -48,19 +74,37 @@ "id": "test_uncompressed", "single_end": false }, - "test_uncompressed.vcf:md5,d3cf5a6eaf6cca5b957833a313c5fbf4" + "test_uncompressed.vcf.gz:md5,d3cf5a6eaf6cca5b957833a313c5fbf4" ] ], "1": [ + [ + { + "id": "test_uncompressed", + "single_end": false + }, + "test_uncompressed.vcf.gz.tbi:md5,67a4272d5897fea5cc395dc87afc3629" + ] + ], + "2": [ "versions.yml:md5,5ff0991b612706ce15d82eb1564513b0" ], + "tbi": [ + [ + { + "id": "test_uncompressed", + "single_end": false + }, + "test_uncompressed.vcf.gz.tbi:md5,67a4272d5897fea5cc395dc87afc3629" + ] + ], "vcf": [ [ { "id": "test_uncompressed", "single_end": false }, - "test_uncompressed.vcf:md5,d3cf5a6eaf6cca5b957833a313c5fbf4" + "test_uncompressed.vcf.gz:md5,d3cf5a6eaf6cca5b957833a313c5fbf4" ] ], "versions": [ @@ -68,6 +112,10 @@ ] } ], - "timestamp": "2023-12-06T12:21:19.255212216" + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.3" + }, + "timestamp": "2025-01-23T14:04:48.104846161" } } \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 46ab76ca..669bd9f6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -277,7 +277,7 @@ manifest { description = """A nextflow pipeline for calling and annotating small germline variants from short DNA reads for WES and WGS data""" mainScript = 'main.nf' nextflowVersion = '!>=24.10.0' - version = '1.9.3' + version = '1.10.0dev' doi = '' } diff --git a/subworkflows/local/bam_call_elprep/main.nf b/subworkflows/local/bam_call_elprep/main.nf index 11c8180c..0d120aea 100644 --- a/subworkflows/local/bam_call_elprep/main.nf +++ b/subworkflows/local/bam_call_elprep/main.nf @@ -37,8 +37,7 @@ workflow BAM_CALL_ELPREP { ch_versions = ch_versions.mix(ELPREP_FILTER.out.versions.first()) VCF_CONCAT_BCFTOOLS( - ELPREP_FILTER.out.gvcf, - true + ELPREP_FILTER.out.gvcf ) ch_versions = ch_versions.mix(VCF_CONCAT_BCFTOOLS.out.versions) diff --git a/subworkflows/local/bam_call_elprep/tests/main.nf.test.snap b/subworkflows/local/bam_call_elprep/tests/main.nf.test.snap index 2d85d7f1..002917cb 100644 --- a/subworkflows/local/bam_call_elprep/tests/main.nf.test.snap +++ b/subworkflows/local/bam_call_elprep/tests/main.nf.test.snap @@ -4,11 +4,15 @@ [ [ { - "id": "NA24143", - "sample": "NA24143", - "family": "Ashkenazim", - "family_samples": "NA24143", - "caller": "elprep" + "groupSize": 3, + "groupTarget": { + "id": "NA24143", + "sample": "NA24143", + "family": "Ashkenazim", + "family_samples": "NA24143", + "split_count": 3, + "caller": "elprep" + } }, "variantsMD5:974ed65cfad6264db7c6589d6b7d7d74", "g.vcf.gz.tbi" @@ -17,11 +21,15 @@ [ [ { - "id": "NA24143", - "sample": "NA24143", - "family": "Ashkenazim", - "family_samples": "NA24143", - "caller": "elprep" + "groupSize": 3, + "groupTarget": { + "id": "NA24143", + "sample": "NA24143", + "family": "Ashkenazim", + "family_samples": "NA24143", + "split_count": 3, + "caller": "elprep" + } }, "NA24143.elprep.bcftools_stats.txt:md5,36b9f979c03b24d87e2dc710baf3672b" ] @@ -29,20 +37,24 @@ ], "meta": { "nf-test": "0.9.1", - "nextflow": "24.10.0" + "nextflow": "24.10.3" }, - "timestamp": "2024-11-06T16:23:49.669427501" + "timestamp": "2025-01-23T17:21:58.291545093" }, "bam_call_elprep - default": { "content": [ [ [ { - "id": "NA24143", - "sample": "NA24143", - "family": "Ashkenazim", - "family_samples": "NA24143", - "caller": "elprep" + "groupSize": 3, + "groupTarget": { + "id": "NA24143", + "sample": "NA24143", + "family": "Ashkenazim", + "family_samples": "NA24143", + "split_count": 3, + "caller": "elprep" + } }, "variantsMD5:974ed65cfad6264db7c6589d6b7d7d74", "g.vcf.gz.tbi" @@ -51,11 +63,15 @@ [ [ { - "id": "NA24143", - "sample": "NA24143", - "family": "Ashkenazim", - "family_samples": "NA24143", - "caller": "elprep" + "groupSize": 3, + "groupTarget": { + "id": "NA24143", + "sample": "NA24143", + "family": "Ashkenazim", + "family_samples": "NA24143", + "split_count": 3, + "caller": "elprep" + } }, "NA24143.elprep.bcftools_stats.txt:md5,36b9f979c03b24d87e2dc710baf3672b" ] @@ -63,8 +79,8 @@ ], "meta": { "nf-test": "0.9.1", - "nextflow": "24.10.0" + "nextflow": "24.10.3" }, - "timestamp": "2024-11-06T16:23:17.425264939" + "timestamp": "2025-01-23T17:21:39.582489455" } } \ No newline at end of file diff --git a/subworkflows/local/bam_call_vardictjava/main.nf b/subworkflows/local/bam_call_vardictjava/main.nf index d24fe6d2..bc0df32d 100644 --- a/subworkflows/local/bam_call_vardictjava/main.nf +++ b/subworkflows/local/bam_call_vardictjava/main.nf @@ -1,12 +1,6 @@ include { VARDICTJAVA } from '../../../modules/nf-core/vardictjava/main' -include { TABIX_BGZIP } from '../../../modules/nf-core/tabix/bgzip/main' -include { BCFTOOLS_REHEADER } from '../../../modules/nf-core/bcftools/reheader/main' -include { VCFANNO } from '../../../modules/nf-core/vcfanno/main' -include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' -include { BCFTOOLS_STATS } from '../../../modules/nf-core/bcftools/stats/main' include { VCF_CONCAT_BCFTOOLS } from '../vcf_concat_bcftools/main' -include { VCF_FILTER_BCFTOOLS } from '../vcf_filter_bcftools/main' include { VCF_DBSNP_VCFANNO } from '../vcf_dbsnp_vcfanno/main' workflow BAM_CALL_VARDICTJAVA { @@ -31,8 +25,7 @@ workflow BAM_CALL_VARDICTJAVA { ch_versions = ch_versions.mix(VARDICTJAVA.out.versions.first()) VCF_CONCAT_BCFTOOLS( - VARDICTJAVA.out.vcf, - true + VARDICTJAVA.out.vcf ) ch_versions = ch_versions.mix(VCF_CONCAT_BCFTOOLS.out.versions) diff --git a/subworkflows/local/bam_call_vardictjava/tests/main.nf.test.snap b/subworkflows/local/bam_call_vardictjava/tests/main.nf.test.snap index 5d37ef35..b5abec94 100644 --- a/subworkflows/local/bam_call_vardictjava/tests/main.nf.test.snap +++ b/subworkflows/local/bam_call_vardictjava/tests/main.nf.test.snap @@ -8,6 +8,7 @@ "sample": "NA24143", "family": "Ashkenazim", "family_samples": "NA24143", + "split_count": 3, "caller": "vardict" }, "variantsMD5:98497d2c15c6e3781f5ddeb81bf6288f", @@ -17,8 +18,8 @@ ], "meta": { "nf-test": "0.9.1", - "nextflow": "24.10.0" + "nextflow": "24.10.3" }, - "timestamp": "2024-11-06T16:29:46.9755281" + "timestamp": "2025-01-23T17:23:57.944543761" } } \ No newline at end of file diff --git a/subworkflows/local/cram_call_gatk4/main.nf b/subworkflows/local/cram_call_gatk4/main.nf index a63564a2..75e3c32c 100644 --- a/subworkflows/local/cram_call_gatk4/main.nf +++ b/subworkflows/local/cram_call_gatk4/main.nf @@ -82,8 +82,7 @@ workflow CRAM_CALL_GATK4 { .set { ch_called_variants } VCF_CONCAT_BCFTOOLS( - ch_called_variants, - true + ch_called_variants ) ch_versions = ch_versions.mix(VCF_CONCAT_BCFTOOLS.out.versions) diff --git a/subworkflows/local/cram_call_gatk4/tests/main.nf.test.snap b/subworkflows/local/cram_call_gatk4/tests/main.nf.test.snap index 51d0bde2..3263ae7b 100644 --- a/subworkflows/local/cram_call_gatk4/tests/main.nf.test.snap +++ b/subworkflows/local/cram_call_gatk4/tests/main.nf.test.snap @@ -4,11 +4,15 @@ [ [ { - "id": "NA24143", - "sample": "NA24143", - "family": "Ashkenazim", - "family_samples": "NA24143", - "caller": "haplotypecaller" + "groupSize": 3, + "groupTarget": { + "id": "NA24143", + "sample": "NA24143", + "family": "Ashkenazim", + "family_samples": "NA24143", + "split_count": 3, + "caller": "haplotypecaller" + } }, "variantsMD5:57a0b3ce429f38292730f965277d28d5", "g.vcf.gz.tbi" @@ -17,11 +21,15 @@ [ [ { - "id": "NA24143", - "sample": "NA24143", - "family": "Ashkenazim", - "family_samples": "NA24143", - "caller": "haplotypecaller" + "groupSize": 3, + "groupTarget": { + "id": "NA24143", + "sample": "NA24143", + "family": "Ashkenazim", + "family_samples": "NA24143", + "split_count": 3, + "caller": "haplotypecaller" + } }, "NA24143.haplotypecaller.bcftools_stats.txt:md5,09b4e7674e0f5b98b1e548df3002250e" ] @@ -29,20 +37,24 @@ ], "meta": { "nf-test": "0.9.1", - "nextflow": "24.10.0" + "nextflow": "24.10.3" }, - "timestamp": "2024-11-06T16:31:34.986729048" + "timestamp": "2025-01-23T17:22:36.704823732" }, "cram_call_gatk4 - dragstr": { "content": [ [ [ { - "id": "NA24143", - "sample": "NA24143", - "family": "Ashkenazim", - "family_samples": "NA24143", - "caller": "haplotypecaller" + "groupSize": 3, + "groupTarget": { + "id": "NA24143", + "sample": "NA24143", + "family": "Ashkenazim", + "family_samples": "NA24143", + "split_count": 3, + "caller": "haplotypecaller" + } }, "variantsMD5:69601e4deb53c65d30fff9d260e31bb9", "g.vcf.gz.tbi" @@ -51,11 +63,15 @@ [ [ { - "id": "NA24143", - "sample": "NA24143", - "family": "Ashkenazim", - "family_samples": "NA24143", - "caller": "haplotypecaller" + "groupSize": 3, + "groupTarget": { + "id": "NA24143", + "sample": "NA24143", + "family": "Ashkenazim", + "family_samples": "NA24143", + "split_count": 3, + "caller": "haplotypecaller" + } }, "NA24143.haplotypecaller.bcftools_stats.txt:md5,c4dad5b8e05871dda66df42b1f6c89ff" ] @@ -63,8 +79,8 @@ ], "meta": { "nf-test": "0.9.1", - "nextflow": "24.10.0" + "nextflow": "24.10.3" }, - "timestamp": "2024-11-06T16:32:34.211560941" + "timestamp": "2025-01-23T17:23:11.945556759" } } \ No newline at end of file diff --git a/subworkflows/local/cram_prepare_samtools_bedtools/main.nf b/subworkflows/local/cram_prepare_samtools_bedtools/main.nf index 5379141d..97f0ca76 100644 --- a/subworkflows/local/cram_prepare_samtools_bedtools/main.nf +++ b/subworkflows/local/cram_prepare_samtools_bedtools/main.nf @@ -4,14 +4,11 @@ include { MERGE_BEDS as MERGE_ROI_PARAMS } from '../../../modules/local/merge_beds' include { MERGE_BEDS as MERGE_ROI_SAMPLE } from '../../../modules/local/merge_beds' -include { FILTER_BEDS } from '../../../modules/local/filter_beds/main' +include { PROCESS_BEDS } from '../../../modules/local/process_beds' include { SAMTOOLS_MERGE } from '../../../modules/nf-core/samtools/merge/main' include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' include { SAMTOOLS_CONVERT } from '../../../modules/nf-core/samtools/convert/main' -include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' -include { TABIX_BGZIP as UNZIP_ROI } from '../../../modules/nf-core/tabix/bgzip/main' -include { BEDTOOLS_INTERSECT } from '../../../modules/nf-core/bedtools/intersect/main' include { MOSDEPTH } from '../../../modules/nf-core/mosdepth/main' workflow CRAM_PREPARE_SAMTOOLS_BEDTOOLS { @@ -154,33 +151,16 @@ workflow CRAM_PREPARE_SAMTOOLS_BEDTOOLS { def ch_perbase_beds = MOSDEPTH.out.per_base_bed .join(MOSDEPTH.out.per_base_csi, failOnMismatch: true, failOnDuplicate:true) - def ch_beds_to_filter = ch_ready_rois - .join(MOSDEPTH.out.quantized_bed, failOnDuplicate:true, failOnMismatch:true) + def ch_beds_to_process = MOSDEPTH.out.quantized_bed + .join(ch_ready_rois, failOnDuplicate:true, failOnMismatch:true) // Filter out the regions with no coverage - FILTER_BEDS( - ch_beds_to_filter.map { meta, _roi, callable -> [ meta, callable ]} - ) - ch_versions = ch_versions.mix(FILTER_BEDS.out.versions) - - def ch_beds_to_intersect = FILTER_BEDS.out.bed - .join(ch_beds_to_filter, failOnDuplicate:true, failOnMismatch:true) - .branch { meta, filtered_callable, roi, _callable -> - roi: roi - return [ meta, roi, filtered_callable ] - no_roi: !roi - return [ meta, filtered_callable ] - } - - // Intersect the ROI with the callable regions - BEDTOOLS_INTERSECT( - ch_beds_to_intersect.roi, - ch_fai + PROCESS_BEDS( + ch_beds_to_process ) - ch_versions = ch_versions.mix(BEDTOOLS_INTERSECT.out.versions) + ch_versions = ch_versions.mix(PROCESS_BEDS.out.versions) - def ch_ready_beds = ch_beds_to_intersect.no_roi - .mix(BEDTOOLS_INTERSECT.out.intersect) + def ch_ready_beds = PROCESS_BEDS.out.bed emit: ready_crams = ch_ready_crams // [ val(meta), path(cram), path(crai) ] diff --git a/subworkflows/local/cram_prepare_samtools_bedtools/tests/main.nf.test.snap b/subworkflows/local/cram_prepare_samtools_bedtools/tests/main.nf.test.snap index 5849a29e..01b19356 100644 --- a/subworkflows/local/cram_prepare_samtools_bedtools/tests/main.nf.test.snap +++ b/subworkflows/local/cram_prepare_samtools_bedtools/tests/main.nf.test.snap @@ -36,7 +36,7 @@ "family_samples": "NA24143", "duplicate_count": 1 }, - "NA24143.intersect.bed:md5,b87069698afefb15282d069e56110046" + "NA24143.filter.bed:md5,b87069698afefb15282d069e56110046" ] ], [ @@ -60,7 +60,7 @@ "nf-test": "0.9.1", "nextflow": "24.10.3" }, - "timestamp": "2025-01-22T13:39:12.575142149" + "timestamp": "2025-01-23T16:50:04.778372478" }, "cram_prepare_samtools_bedtools - default - WES": { "content": [ @@ -89,7 +89,7 @@ "family_samples": "NA24143", "duplicate_count": 1 }, - "NA24143.intersect.bed:md5,b87069698afefb15282d069e56110046" + "NA24143.filter.bed:md5,b87069698afefb15282d069e56110046" ] ], [ @@ -113,7 +113,7 @@ "nf-test": "0.9.1", "nextflow": "24.10.3" }, - "timestamp": "2025-01-22T13:39:30.958080052" + "timestamp": "2025-01-23T16:50:16.71563132" }, "cram_prepare_samtools_bedtools - default - WGS": { "content": [ @@ -205,7 +205,7 @@ "family_samples": "NA24143", "duplicate_count": 2 }, - "NA24143.intersect.bed:md5,b87069698afefb15282d069e56110046" + "NA24143.filter.bed:md5,b87069698afefb15282d069e56110046" ] ], [ @@ -229,6 +229,6 @@ "nf-test": "0.9.1", "nextflow": "24.10.3" }, - "timestamp": "2025-01-22T13:40:12.40700375" + "timestamp": "2025-01-23T16:50:46.654809599" } } \ No newline at end of file diff --git a/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf b/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf index c9b4b7e2..0f0ccbeb 100644 --- a/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf +++ b/subworkflows/local/gvcf_joint_genotype_gatk4/main.nf @@ -130,8 +130,7 @@ workflow GVCF_JOINT_GENOTYPE_GATK4 { // VCF_CONCAT_BCFTOOLS( - ch_gather_inputs, - true + ch_gather_inputs ) ch_versions = ch_versions.mix(VCF_CONCAT_BCFTOOLS.out.versions) diff --git a/subworkflows/local/gvcf_joint_genotype_gatk4/tests/main.nf.test.snap b/subworkflows/local/gvcf_joint_genotype_gatk4/tests/main.nf.test.snap index c35e117a..84c9d2a3 100644 --- a/subworkflows/local/gvcf_joint_genotype_gatk4/tests/main.nf.test.snap +++ b/subworkflows/local/gvcf_joint_genotype_gatk4/tests/main.nf.test.snap @@ -4,20 +4,28 @@ [ [ { - "family": "Ashkenazim", - "family_samples": "NA24143,NA24149", - "caller": "haplotypecaller", - "id": "Ashkenazim" + "groupSize": 1, + "groupTarget": { + "family": "Ashkenazim", + "family_samples": "NA24143,NA24149", + "caller": "haplotypecaller", + "id": "Ashkenazim", + "split_count": 1 + } }, "variantsMD5:4dea305eb71decb122709e75af9c833f", "vcf.gz.tbi" ], [ { - "family": "NA24385", - "family_samples": "NA24385", - "caller": "haplotypecaller", - "id": "NA24385" + "groupSize": 2, + "groupTarget": { + "family": "NA24385", + "family_samples": "NA24385", + "caller": "haplotypecaller", + "id": "NA24385", + "split_count": 2 + } }, "variantsMD5:4ffd515511f59e3561e3fb1b046d7675", "vcf.gz.tbi" @@ -26,19 +34,23 @@ ], "meta": { "nf-test": "0.9.1", - "nextflow": "24.10.0" + "nextflow": "24.10.3" }, - "timestamp": "2024-11-06T16:40:54.696361238" + "timestamp": "2025-01-23T17:30:36.436180908" }, "gvcf_joint_genotype_gatk4 - single_sample": { "content": [ [ [ { - "family": "Ashkenazim", - "family_samples": "NA24143", - "caller": "haplotypecaller", - "id": "Ashkenazim" + "groupSize": 1, + "groupTarget": { + "family": "Ashkenazim", + "family_samples": "NA24143", + "caller": "haplotypecaller", + "id": "Ashkenazim", + "split_count": 1 + } }, "variantsMD5:4c6db9171912bcbbaefeec2a24968a", "vcf.gz.tbi" @@ -47,9 +59,9 @@ ], "meta": { "nf-test": "0.9.1", - "nextflow": "24.10.0" + "nextflow": "24.10.3" }, - "timestamp": "2024-11-06T16:38:28.514998644" + "timestamp": "2025-01-23T17:29:22.682758973" }, "gvcf_joint_genotype_gatk4 - only_merge": { "content": [ @@ -68,10 +80,14 @@ [ [ { - "family": "Ashkenazim", - "family_samples": "NA24143,NA24149", - "caller": "haplotypecaller", - "id": "Ashkenazim" + "groupSize": 1, + "groupTarget": { + "family": "Ashkenazim", + "family_samples": "NA24143,NA24149", + "caller": "haplotypecaller", + "id": "Ashkenazim", + "split_count": 1 + } }, "variantsMD5:4dea305eb71decb122709e75af9c833f", "vcf.gz.tbi" @@ -80,8 +96,8 @@ ], "meta": { "nf-test": "0.9.1", - "nextflow": "24.10.0" + "nextflow": "24.10.3" }, - "timestamp": "2024-11-06T16:39:15.421025343" + "timestamp": "2025-01-23T17:29:47.754017104" } } \ No newline at end of file diff --git a/subworkflows/local/vcf_annotate_ensemblvep/main.nf b/subworkflows/local/vcf_annotate_ensemblvep/main.nf index abb04d99..f37b5ac5 100644 --- a/subworkflows/local/vcf_annotate_ensemblvep/main.nf +++ b/subworkflows/local/vcf_annotate_ensemblvep/main.nf @@ -3,11 +3,10 @@ // include { ENSEMBLVEP_VEP } from '../../../modules/nf-core/ensemblvep/vep/main' -include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' -include { TABIX_BGZIP } from '../../../modules/nf-core/tabix/bgzip/main' include { BCFTOOLS_PLUGINSCATTER } from '../../../modules/nf-core/bcftools/pluginscatter/main' include { BCFTOOLS_CONCAT } from '../../../modules/nf-core/bcftools/concat/main' include { BCFTOOLS_SORT } from '../../../modules/nf-core/bcftools/sort/main' +include { ENSEMBLVEP_DOWNLOAD } from '../../../modules/nf-core/ensemblvep/download/main.nf' workflow VCF_ANNOTATE_ENSEMBLVEP { take: @@ -97,6 +96,7 @@ workflow VCF_ANNOTATE_ENSEMBLVEP { ch_versions = ch_versions.mix(ENSEMBLVEP_VEP.out.versions.first()) def ch_vep_output = ENSEMBLVEP_VEP.out.vcf + .join(ENSEMBLVEP_VEP.out.tbi, failOnDuplicate:true, failOnMismatch:true) def ch_vep_reports = ENSEMBLVEP_VEP.out.report // Gather the files back together if they were scattered @@ -108,13 +108,13 @@ workflow VCF_ANNOTATE_ENSEMBLVEP { def ch_concat_input = ch_vep_output .join(ch_scatter.count, failOnDuplicate:true, failOnMismatch:true) - .map { meta, vcf, id, count -> + .map { meta, vcf, tbi, id, count -> def new_meta = meta + [id:id] - [ groupKey(new_meta, count), vcf ] + [ groupKey(new_meta, count), vcf, tbi ] } .groupTuple() // Group the VCFs which need to be concatenated - .map { meta, vcf -> - [ meta, vcf, [] ] + .map { meta, vcfs, tbis -> + [ meta, vcfs, tbis ] } BCFTOOLS_CONCAT( @@ -132,33 +132,13 @@ workflow VCF_ANNOTATE_ENSEMBLVEP { ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions.first()) ch_ready_vcfs = BCFTOOLS_SORT.out.vcf + .join(BCFTOOLS_SORT.out.tbi, failOnDuplicate:true, failOnMismatch:true) } else { ch_ready_vcfs = ch_vep_output } - // - // Index the resulting bgzipped VCFs - // - - def ch_tabix_input = ch_ready_vcfs - .branch { meta, vcf -> - // Split the bgzipped VCFs from the unzipped VCFs (only bgzipped VCFs should be indexed) - bgzip: vcf.extension == "gz" - unzip: true - return [ meta, vcf, [] ] - } - - TABIX_TABIX( - ch_tabix_input.bgzip - ) - ch_versions = ch_versions.mix(TABIX_TABIX.out.versions) - - def ch_vcf_tbi = ch_tabix_input.bgzip - .join(TABIX_TABIX.out.tbi, failOnDuplicate: true, failOnMismatch: true) - .mix(ch_tabix_input.unzip) - emit: - vcf_tbi = ch_vcf_tbi // channel: [ val(meta), path(vcf), path(tbi) ] + vcf_tbi = ch_ready_vcfs // channel: [ val(meta), path(vcf), path(tbi) ] vep_reports = ch_vep_reports // channel: [ path(html) ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/vcf_annotate_ensemblvep/tests/main.nf.test.snap b/subworkflows/local/vcf_annotate_ensemblvep/tests/main.nf.test.snap index 0d73abc0..1042317c 100644 --- a/subworkflows/local/vcf_annotate_ensemblvep/tests/main.nf.test.snap +++ b/subworkflows/local/vcf_annotate_ensemblvep/tests/main.nf.test.snap @@ -10,8 +10,8 @@ "single_end": false } }, - "custom_test.vcf.gz,variantsMD5:44ed24c4dc4223670a78ffea3c7459e", - "custom_test.vcf.gz.tbi" + "custom_test.sorted.vcf.gz,variantsMD5:44ed24c4dc4223670a78ffea3c7459e", + "custom_test.sorted.vcf.gz.tbi" ] ], [ @@ -21,15 +21,14 @@ "versions.yml", "versions.yml", "versions.yml", - "versions.yml", "versions.yml" ] ], "meta": { "nf-test": "0.9.1", - "nextflow": "24.10.0" + "nextflow": "24.10.3" }, - "timestamp": "2024-11-20T14:31:50.569767686" + "timestamp": "2025-01-23T16:59:25.998935509" }, "sarscov2 - ensemblvep - no scatter": { "content": [ @@ -47,15 +46,14 @@ "custom_test.vep.vcf.gz_summary.html" ], [ - "versions.yml", "versions.yml" ] ], "meta": { "nf-test": "0.9.1", - "nextflow": "24.10.0" + "nextflow": "24.10.3" }, - "timestamp": "2024-11-20T14:32:06.437006334" + "timestamp": "2025-01-23T16:56:52.901402484" }, "sarscov2 - ensemblvep": { "content": [ @@ -68,8 +66,8 @@ "single_end": false } }, - "custom_test.vcf.gz,variantsMD5:44ed24c4dc4223670a78ffea3c7459e", - "custom_test.vcf.gz.tbi" + "custom_test.sorted.vcf.gz,variantsMD5:44ed24c4dc4223670a78ffea3c7459e", + "custom_test.sorted.vcf.gz.tbi" ] ], [ @@ -80,14 +78,13 @@ "versions.yml", "versions.yml", "versions.yml", - "versions.yml", "versions.yml" ] ], "meta": { "nf-test": "0.9.1", - "nextflow": "24.10.0" + "nextflow": "24.10.3" }, - "timestamp": "2024-11-20T14:31:32.997494596" + "timestamp": "2025-01-23T16:59:09.563813963" } } \ No newline at end of file diff --git a/subworkflows/local/vcf_annotation/main.nf b/subworkflows/local/vcf_annotation/main.nf index df3b1925..59894ef0 100644 --- a/subworkflows/local/vcf_annotation/main.nf +++ b/subworkflows/local/vcf_annotation/main.nf @@ -2,17 +2,13 @@ // ANNOTATION // -include { ENSEMBLVEP_VEP } from '../../../modules/nf-core/ensemblvep/vep/main' include { VCFANNO } from '../../../modules/nf-core/vcfanno/main' -include { TABIX_BGZIP as BGZIP_ANNOTATED_VCFS } from '../../../modules/nf-core/tabix/bgzip/main' -include { TABIX_TABIX as TABIX_ENSEMBLVEP } from '../../../modules/nf-core/tabix/tabix/main' -include { BCFTOOLS_CONCAT } from '../../../modules/nf-core/bcftools/concat/main' include { VCF_ANNOTATE_ENSEMBLVEP } from '../../../subworkflows/local/vcf_annotate_ensemblvep/main' workflow VCF_ANNOTATION { take: - ch_vcfs // channel: [mandatory] [ val(meta), path(vcf) ] => The post-processed VCFs + ch_vcfs // channel: [mandatory] [ val(meta), path(vcf), path(tbi) ] => The post-processed VCFs ch_fasta // channel: [mandatory] [ val(meta2), path(fasta) ] => fasta reference ch_vep_cache // channel: [optional] [ path(vep_cache) ] => The VEP cache to use ch_vep_extra_files // channel: [optional] [ path(file_1, file_2, file_3, ...) ] => All files necessary for using the desired plugins @@ -31,21 +27,7 @@ workflow VCF_ANNOTATION { def ch_reports = Channel.empty() def ch_versions = Channel.empty() - def ch_tabix_input = ch_vcfs - .branch { meta, vcf, tbi=[] -> - tbi: tbi - no_tbi: !tbi - return [ meta, vcf ] - } - - TABIX_ENSEMBLVEP( - ch_tabix_input.no_tbi - ) - ch_versions = ch_versions.mix(TABIX_ENSEMBLVEP.out.versions.first()) - - def ch_vep_input = ch_tabix_input.no_tbi - .join(TABIX_ENSEMBLVEP.out.tbi, failOnDuplicate:true, failOnMismatch:true) - .mix(ch_tabix_input.tbi) + def ch_vep_input = ch_vcfs .map { meta, vcf, tbi -> [ meta, vcf, tbi, [] ] } @@ -88,20 +70,14 @@ workflow VCF_ANNOTATION { ) ch_versions = ch_versions.mix(VCFANNO.out.versions.first()) - BGZIP_ANNOTATED_VCFS( - VCFANNO.out.vcf - ) - ch_versions = ch_versions.mix(BGZIP_ANNOTATED_VCFS.out.versions.first()) - - ch_annotated_vcfs = BGZIP_ANNOTATED_VCFS.out.output + ch_annotated_vcfs = VCFANNO.out.vcf.join(VCFANNO.out.tbi, failOnDuplicate:true, failOnMismatch:true) } else { ch_annotated_vcfs = VCF_ANNOTATE_ENSEMBLVEP.out.vcf_tbi - .map { meta, vcf, _tbi -> [ meta, vcf ]} } emit: - annotated_vcfs = ch_annotated_vcfs // [ val(meta), path(vcf) ] + annotated_vcfs = ch_annotated_vcfs // [ val(meta), path(vcf), path(tbi) ] reports = ch_reports // [ path(reports) ] versions = ch_versions // [ path(versions) ] } diff --git a/subworkflows/local/vcf_annotation/tests/main.nf.test b/subworkflows/local/vcf_annotation/tests/main.nf.test index 48757069..22ded8c2 100644 --- a/subworkflows/local/vcf_annotation/tests/main.nf.test +++ b/subworkflows/local/vcf_annotation/tests/main.nf.test @@ -20,7 +20,8 @@ nextflow_workflow { """ input[0] = Channel.of([ [id:"NA24143", family:"NA24143", family_samples:"NA24143", caller:"haplotypecaller"], - file(params.vcf1, checkIfExists:true) + file(params.vcf1, checkIfExists:true), + file(params.tbi1, checkIfExists:true) ]) input[1] = Channel.value([ [id:"fasta"], @@ -63,7 +64,8 @@ nextflow_workflow { """ input[0] = Channel.of([ [id:"NA24143", family:"NA24143", family_samples:"NA24143", caller:"haplotypecaller"], - file(params.vcf1, checkIfExists:true) + file(params.vcf1, checkIfExists:true), + file(params.tbi1, checkIfExists:true) ]) input[1] = Channel.value([ [id:"fasta"], diff --git a/subworkflows/local/vcf_concat_bcftools/main.nf b/subworkflows/local/vcf_concat_bcftools/main.nf index d2f3b186..3125e9bb 100644 --- a/subworkflows/local/vcf_concat_bcftools/main.nf +++ b/subworkflows/local/vcf_concat_bcftools/main.nf @@ -3,12 +3,10 @@ // include { BCFTOOLS_CONCAT } from '../../../modules/nf-core/bcftools/concat/main' -include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' workflow VCF_CONCAT_BCFTOOLS { take: ch_vcfs // channel: [ val(meta), path(vcf), path(tbi) ] - val_tabix // boolean: whether to create a index or not main: @@ -29,29 +27,8 @@ workflow VCF_CONCAT_BCFTOOLS { ch_concat_input ) ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions.first()) - - def ch_vcf_tbi = Channel.empty() - if(val_tabix) { - TABIX_TABIX( - BCFTOOLS_CONCAT.out.vcf - ) - ch_versions = ch_versions.mix(TABIX_TABIX.out.versions.first()) - - ch_vcf_tbi = BCFTOOLS_CONCAT.out.vcf - .join(TABIX_TABIX.out.tbi, failOnDuplicate: true, failOnMismatch: true) - .map { meta, vcf, tbi -> - // Remove the bed counter from the meta field - def new_meta = meta - meta.subMap("split_count") - [ new_meta, vcf, tbi ] - } - } else { - ch_vcf_tbi = BCFTOOLS_CONCAT.out.vcf - .map { meta, vcf -> - // Remove the bed counter from the meta field - def new_meta = meta - meta.subMap("split_count") - [ new_meta, vcf ] - } - } + def ch_vcf_tbi = BCFTOOLS_CONCAT.out.vcf + .join(BCFTOOLS_CONCAT.out.tbi, failOnDuplicate:true, failOnMismatch:true) emit: vcfs = ch_vcf_tbi // channel: [ val(meta), path(vcf), path(tbi) ] diff --git a/subworkflows/local/vcf_concat_bcftools/tests/main.nf.test b/subworkflows/local/vcf_concat_bcftools/tests/main.nf.test index deb66854..ac3dd7f2 100644 --- a/subworkflows/local/vcf_concat_bcftools/tests/main.nf.test +++ b/subworkflows/local/vcf_concat_bcftools/tests/main.nf.test @@ -8,7 +8,7 @@ nextflow_workflow { tag "subworkflows_local" tag "vcf_concat_bcftools" - test("vcf_concat_bcftools - no_tabix") { + test("vcf_concat_bcftools") { config "./nextflow.config" @@ -24,7 +24,6 @@ nextflow_workflow { file(params.vcf1, checkIfExists:true), file(params.tbi1, checkIfExists:true) ]) - input[1] = false """ } } @@ -40,36 +39,4 @@ nextflow_workflow { } - test("vcf_concat_bcftools - tabix") { - - config "./nextflow.config" - - when { - workflow { - """ - input[0] = Channel.of([ - [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", caller:"haplotypecaller", split_count:2], - file(params.gvcf1, checkIfExists:true), - file(params.gtbi1, checkIfExists:true) - ],[ - [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", caller:"haplotypecaller", split_count:2], - file(params.vcf1, checkIfExists:true), - file(params.tbi1, checkIfExists:true) - ]) - input[1] = true - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.vcfs.collect { [it[0], "variantsMD5:${path(it[1]).vcf.variantsMD5}", it[2][-10..-1]] } - ).match() } - ) - } - - } - } diff --git a/subworkflows/local/vcf_concat_bcftools/tests/main.nf.test.snap b/subworkflows/local/vcf_concat_bcftools/tests/main.nf.test.snap index cfbc2f82..8f2cc9dc 100644 --- a/subworkflows/local/vcf_concat_bcftools/tests/main.nf.test.snap +++ b/subworkflows/local/vcf_concat_bcftools/tests/main.nf.test.snap @@ -1,36 +1,18 @@ { - "vcf_concat_bcftools - tabix": { + "vcf_concat_bcftools": { "content": [ [ [ { - "id": "NA24143", - "sample": "NA24143", - "family": "Ashkenazim", - "family_samples": "NA24143", - "caller": "haplotypecaller" - }, - "variantsMD5:843352db8fe3f441ffa026dc72a30c35", - "vcf.gz.tbi" - ] - ] - ], - "meta": { - "nf-test": "0.9.1", - "nextflow": "24.10.0" - }, - "timestamp": "2024-11-14T13:41:32.794902067" - }, - "vcf_concat_bcftools - no_tabix": { - "content": [ - [ - [ - { - "id": "NA24143", - "sample": "NA24143", - "family": "Ashkenazim", - "family_samples": "NA24143", - "caller": "haplotypecaller" + "groupSize": 2, + "groupTarget": { + "id": "NA24143", + "sample": "NA24143", + "family": "Ashkenazim", + "family_samples": "NA24143", + "caller": "haplotypecaller", + "split_count": 2 + } }, "variantsMD5:843352db8fe3f441ffa026dc72a30c35" ] @@ -38,8 +20,8 @@ ], "meta": { "nf-test": "0.9.1", - "nextflow": "24.10.0" + "nextflow": "24.10.3" }, - "timestamp": "2024-11-14T13:42:04.623824246" + "timestamp": "2025-01-23T17:07:22.212689866" } } \ No newline at end of file diff --git a/subworkflows/local/vcf_concat_bcftools/tests/nextflow.config b/subworkflows/local/vcf_concat_bcftools/tests/nextflow.config index 654ac56e..7387ac8e 100644 --- a/subworkflows/local/vcf_concat_bcftools/tests/nextflow.config +++ b/subworkflows/local/vcf_concat_bcftools/tests/nextflow.config @@ -1,6 +1,6 @@ process { withName: "BCFTOOLS_CONCAT" { - ext.args = "--allow-overlaps --output-type z" + ext.args = "--allow-overlaps --output-type z --write-index=tbi" ext.prefix = { "${meta.id}.concat" } } } diff --git a/subworkflows/local/vcf_dbsnp_vcfanno/main.nf b/subworkflows/local/vcf_dbsnp_vcfanno/main.nf index 718b3067..5adec706 100644 --- a/subworkflows/local/vcf_dbsnp_vcfanno/main.nf +++ b/subworkflows/local/vcf_dbsnp_vcfanno/main.nf @@ -1,5 +1,4 @@ include { VCFANNO } from '../../../modules/nf-core/vcfanno/main' -include { TABIX_BGZIPTABIX } from '../../../modules/nf-core/tabix/bgziptabix/main' workflow VCF_DBSNP_VCFANNO { take: @@ -25,14 +24,11 @@ workflow VCF_DBSNP_VCFANNO { ) ch_versions = ch_versions.mix(VCFANNO.out.versions.first()) - TABIX_BGZIPTABIX( - VCFANNO.out.vcf - ) - ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions.first()) + def ch_vcfs = VCFANNO.out.vcf + .join(VCFANNO.out.tbi, failOnDuplicate:true, failOnMismatch:true) emit: - vcfs = TABIX_BGZIPTABIX.out.gz_tbi // channel: [ val(meta), path(vcf), path(tbi) ] - + vcfs = ch_vcfs // channel: [ val(meta), path(vcf), path(tbi) ] versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/local/vcf_dbsnp_vcfanno/tests/main.nf.test b/subworkflows/local/vcf_dbsnp_vcfanno/tests/main.nf.test index 3461e963..6fc105cd 100644 --- a/subworkflows/local/vcf_dbsnp_vcfanno/tests/main.nf.test +++ b/subworkflows/local/vcf_dbsnp_vcfanno/tests/main.nf.test @@ -8,6 +8,8 @@ nextflow_workflow { tag "subworkflows_local" tag "vcf_dbsnp_vcfanno" + config "./nextflow.config" + test("vcf_dbsnp_vcfanno - default") { when { diff --git a/subworkflows/local/vcf_dbsnp_vcfanno/tests/nextflow.config b/subworkflows/local/vcf_dbsnp_vcfanno/tests/nextflow.config new file mode 100644 index 00000000..f2ce3221 --- /dev/null +++ b/subworkflows/local/vcf_dbsnp_vcfanno/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: VCFANNO { + ext.prefix = { "${meta.id}.annotated" } + } +} diff --git a/subworkflows/local/vcf_filter_bcftools/main.nf b/subworkflows/local/vcf_filter_bcftools/main.nf index cf2cdc1a..57d730fc 100644 --- a/subworkflows/local/vcf_filter_bcftools/main.nf +++ b/subworkflows/local/vcf_filter_bcftools/main.nf @@ -2,42 +2,23 @@ // Filter the VCFs // -include { BCFTOOLS_FILTER as FILTER_1 } from '../../../modules/nf-core/bcftools/filter/main' -include { BCFTOOLS_FILTER as FILTER_2 } from '../../../modules/nf-core/bcftools/filter/main' -include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' +include { BCFTOOLS_FILTER } from '../../../modules/local/bcftools/filter/main' workflow VCF_FILTER_BCFTOOLS { take: ch_vcfs // channel: [ val(meta), path(vcf), path(tbi) ] - val_tabix // boolean: whether to create a index or not main: def ch_versions = Channel.empty() - FILTER_1( - ch_vcfs.map { meta, vcf, tbi=[] -> [ meta, vcf, tbi ]} + BCFTOOLS_FILTER( + ch_vcfs ) - ch_versions = ch_versions.mix(FILTER_1.out.versions.first()) - - FILTER_2( - FILTER_1.out.vcf.map { meta, vcf -> [ meta, vcf, [] ]} - ) - ch_versions = ch_versions.mix(FILTER_2.out.versions.first()) - - def ch_filter_vcfs = Channel.empty() - if(val_tabix) { - TABIX_TABIX( - FILTER_2.out.vcf - ) - ch_versions = ch_versions.mix(TABIX_TABIX.out.versions.first()) - - ch_filter_vcfs = FILTER_2.out.vcf - .join(TABIX_TABIX.out.tbi, failOnDuplicate: true, failOnMismatch: true) - } else { - ch_filter_vcfs = FILTER_2.out.vcf - } + ch_versions = ch_versions.mix(BCFTOOLS_FILTER.out.versions.first()) + def ch_filter_vcfs = BCFTOOLS_FILTER.out.vcf + .join(BCFTOOLS_FILTER.out.tbi, failOnDuplicate:true, failOnMismatch:true) emit: vcfs = ch_filter_vcfs // channel: [ val(meta), path(vcf), path(tbi) ] diff --git a/subworkflows/local/vcf_filter_bcftools/tests/main.nf.test b/subworkflows/local/vcf_filter_bcftools/tests/main.nf.test index 317c60bf..eb3c9414 100644 --- a/subworkflows/local/vcf_filter_bcftools/tests/main.nf.test +++ b/subworkflows/local/vcf_filter_bcftools/tests/main.nf.test @@ -8,7 +8,7 @@ nextflow_workflow { tag "subworkflows_local" tag "vcf_filter_bcftools" - test("vcf_filter_bcftools - no_tabix") { + test("vcf_filter_bcftools") { config "./nextflow.config" @@ -20,7 +20,6 @@ nextflow_workflow { file(params.vcf1, checkIfExists:true), file(params.tbi1, checkIfExists:true) ]) - input[1] = false """ } } @@ -36,32 +35,4 @@ nextflow_workflow { } - test("vcf_filter_bcftools - tabix") { - - config "./nextflow.config" - - when { - workflow { - """ - input[0] = Channel.of([ - [id:"NA24143", sample:"NA24143", family:"Ashkenazim", family_samples:"NA24143", caller:"haplotypecaller"], - file(params.vcf1, checkIfExists:true), - file(params.tbi1, checkIfExists:true) - ]) - input[1] = true - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot( - workflow.out.vcfs.collect { [it[0], "variantsMD5:${path(it[1]).vcf.variantsMD5}", it[2][-10..-1]] } - ).match() } - ) - } - - } - } diff --git a/subworkflows/local/vcf_filter_bcftools/tests/main.nf.test.snap b/subworkflows/local/vcf_filter_bcftools/tests/main.nf.test.snap index 9bc09fa6..974d84d2 100644 --- a/subworkflows/local/vcf_filter_bcftools/tests/main.nf.test.snap +++ b/subworkflows/local/vcf_filter_bcftools/tests/main.nf.test.snap @@ -1,27 +1,5 @@ { - "vcf_filter_bcftools - tabix": { - "content": [ - [ - [ - { - "id": "NA24143", - "sample": "NA24143", - "family": "Ashkenazim", - "family_samples": "NA24143", - "caller": "haplotypecaller" - }, - "variantsMD5:2ce8bc96a9b3afbf060cdd89e74c4c82", - "vcf.gz.tbi" - ] - ] - ], - "meta": { - "nf-test": "0.9.1", - "nextflow": "24.10.0" - }, - "timestamp": "2024-11-14T13:46:42.107550656" - }, - "vcf_filter_bcftools - no_tabix": { + "vcf_filter_bcftools": { "content": [ [ [ @@ -38,8 +16,8 @@ ], "meta": { "nf-test": "0.9.1", - "nextflow": "24.10.0" + "nextflow": "24.10.3" }, - "timestamp": "2024-11-14T13:46:30.33756839" + "timestamp": "2025-01-23T17:11:49.839538282" } } \ No newline at end of file diff --git a/subworkflows/local/vcf_ped_rtgtools/main.nf b/subworkflows/local/vcf_ped_rtgtools/main.nf index f4eba7b7..46709552 100644 --- a/subworkflows/local/vcf_ped_rtgtools/main.nf +++ b/subworkflows/local/vcf_ped_rtgtools/main.nf @@ -29,8 +29,8 @@ workflow VCF_PED_RTGTOOLS { def ch_annotate_input = ch_vcfs .join(RTGTOOLS_PEDFILTER.out.output, failOnDuplicate:true, failOnMismatch:true) - .map { meta, vcf, _tbi, ped_vcf -> - [ meta, vcf, [], [], [], ped_vcf ] + .map { meta, vcf, tbi, ped_vcf -> + [ meta, vcf, tbi, [], [], ped_vcf ] } BCFTOOLS_ANNOTATE( @@ -38,7 +38,10 @@ workflow VCF_PED_RTGTOOLS { ) ch_versions = ch_versions.mix(BCFTOOLS_ANNOTATE.out.versions.first()) + def ch_ped_vcfs = BCFTOOLS_ANNOTATE.out.vcf + .join(BCFTOOLS_ANNOTATE.out.tbi, failOnDuplicate:true, failOnMismatch:true) + emit: - ped_vcfs = BCFTOOLS_ANNOTATE.out.vcf // [ val(meta), path(vcf) ] - versions = ch_versions // [ path(versions) ] + ped_vcfs = ch_ped_vcfs // [ val(meta), path(vcf), path(tbi) ] + versions = ch_versions // [ path(versions) ] } diff --git a/workflows/germline.nf b/workflows/germline.nf index 5508df4f..6ecf2500 100644 --- a/workflows/germline.nf +++ b/workflows/germline.nf @@ -44,12 +44,9 @@ include { UNTAR } from '../ include { ENSEMBLVEP_DOWNLOAD } from '../modules/nf-core/ensemblvep/download/main' include { BCFTOOLS_STATS } from '../modules/nf-core/bcftools/stats/main' include { BCFTOOLS_NORM } from '../modules/nf-core/bcftools/norm/main' -include { TABIX_TABIX as TABIX_DECOMPOSE } from '../modules/nf-core/tabix/tabix/main' -include { TABIX_TABIX as TABIX_NORMALIZE } from '../modules/nf-core/tabix/tabix/main' include { TABIX_TABIX as TABIX_DBSNP } from '../modules/nf-core/tabix/tabix/main' include { TABIX_TABIX as TABIX_GVCF } from '../modules/nf-core/tabix/tabix/main' include { TABIX_TABIX as TABIX_TRUTH } from '../modules/nf-core/tabix/tabix/main' -include { TABIX_TABIX as TABIX_FINAL } from '../modules/nf-core/tabix/tabix/main' include { BCFTOOLS_STATS as BCFTOOLS_STATS_FAMILY } from '../modules/nf-core/bcftools/stats/main' include { VCF2DB } from '../modules/nf-core/vcf2db/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' @@ -558,8 +555,7 @@ workflow GERMLINE { def ch_filtered_variants = Channel.empty() if(filter) { VCF_FILTER_BCFTOOLS( - ch_called_variants, - true + ch_called_variants ) ch_versions = ch_versions.mix(VCF_FILTER_BCFTOOLS.out.versions) ch_filtered_variants = VCF_FILTER_BCFTOOLS.out.vcfs @@ -575,13 +571,8 @@ workflow GERMLINE { ) ch_versions = ch_versions.mix(BCFTOOLS_NORM.out.versions.first()) - TABIX_NORMALIZE( - BCFTOOLS_NORM.out.vcf - ) - ch_versions = ch_versions.mix(TABIX_NORMALIZE.out.versions.first()) - ch_normalized_variants = BCFTOOLS_NORM.out.vcf - .join(TABIX_NORMALIZE.out.tbi, failOnDuplicate:true, failOnMismatch:true) + .join(BCFTOOLS_NORM.out.tbi, failOnDuplicate:true, failOnMismatch:true) } else { ch_normalized_variants = ch_filtered_variants } @@ -628,16 +619,12 @@ workflow GERMLINE { ch_ped_vcfs = VCF_PED_RTGTOOLS.out.ped_vcfs } else { ch_ped_vcfs = ch_normalized_variants - .map { meta, vcf, _tbi=[] -> - [ meta, vcf ] - } } // // Annotation of the variants and creation of Gemini-compatible database files // - def ch_annotation_output = Channel.empty() if (annotate) { VCF_ANNOTATION( ch_ped_vcfs, @@ -656,23 +643,11 @@ workflow GERMLINE { ch_versions = ch_versions.mix(VCF_ANNOTATION.out.versions) ch_reports = ch_reports.mix(VCF_ANNOTATION.out.reports) - ch_annotation_output = VCF_ANNOTATION.out.annotated_vcfs + ch_final_vcfs = VCF_ANNOTATION.out.annotated_vcfs } else { - ch_annotation_output = ch_ped_vcfs + ch_final_vcfs = ch_ped_vcfs } - // - // Tabix the resulting VCF - // - - TABIX_FINAL( - ch_annotation_output - ) - ch_versions = ch_versions.mix(TABIX_FINAL.out.versions.first()) - - ch_final_vcfs = ch_annotation_output - .join(TABIX_FINAL.out.tbi, failOnDuplicate:true, failOnMismatch:true) - // // Validate the found variants //