Merge branch 'master' into fix/cat-fastq

nf-core · Feb 25, 2025 · 21412d1 · 21412d1
2 parents e8c180b + 26b0fc1
commit 21412d1
Show file tree

Hide file tree

Showing 26 changed files with 1,491 additions and 650 deletions.
diff --git a/modules/nf-core/hmmer/hmmpress/environment.yml b/modules/nf-core/hmmer/hmmpress/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::hmmer=3.4
diff --git a/modules/nf-core/hmmer/hmmpress/main.nf b/modules/nf-core/hmmer/hmmpress/main.nf
@@ -0,0 +1,48 @@
+process HMMER_HMMPRESS {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/hmmer:3.4--hdbdd923_1' :
+        'biocontainers/hmmer:3.4--hdbdd923_1' }"
+
+    input:
+    tuple val(meta), path(hmmfile)
+
+    output:
+    tuple val(meta), path("*.h3?"), emit: compressed_db
+    path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+
+    """
+    hmmpress \\
+        $args \\
+        ${hmmfile}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        hmmer: \$(echo \$(hmmpress -h | grep HMMER | sed 's/# HMMER //' | sed 's/ .*//' 2>&1))
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "stub"
+
+    """
+    touch ${prefix}.h3m
+    touch ${prefix}.h3i
+    touch ${prefix}.h3f
+    touch ${prefix}.h3p
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        hmmer: \$(echo \$(hmmpress -h | grep HMMER | sed 's/# HMMER //' | sed 's/ .*//' 2>&1))
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/hmmer/hmmpress/meta.yml b/modules/nf-core/hmmer/hmmpress/meta.yml
@@ -0,0 +1,47 @@
+name: "hmmer_hmmpress"
+description: compress and index profile database for hmmscan
+keywords:
+  - hidden Markov model
+  - HMM
+  - hmmer
+  - hmmpress
+  - hmmscan
+tools:
+  - "hmmer":
+      description: "Biosequence analysis using profile hidden Markov models"
+      homepage: "http://hmmer.org"
+      documentation: "http://hmmer.org/documentation.html"
+      tool_dev_url: "https://github.com/EddyRivasLab/hmmer"
+      doi: "10.1371/journal.pcbi.1002195"
+      licence: ["BSD"]
+      identifier: biotools:hmmer
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - hmmfile:
+        type: file
+        description: HMMER flatfile database of HMM profiles
+        pattern: "*"
+output:
+  - compressed_db:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.h3?":
+          type: list
+          description: Binary files with compressed profiles and their index
+          pattern: "*.h3?"
+  - versions:
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
+authors:
+  - "@ochkalova"
+maintainers:
+  - "@ochkalova"
diff --git a/modules/nf-core/hmmer/hmmpress/tests/main.nf.test b/modules/nf-core/hmmer/hmmpress/tests/main.nf.test
@@ -0,0 +1,66 @@
+
+nextflow_process {
+
+    name "Test Process HMMER_HMMPRESS"
+    script "../main.nf"
+    process "HMMER_HMMPRESS"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "hmmer"
+    tag "hmmer/hmmpress"
+    tag "gunzip"
+
+    setup {
+            run("GUNZIP") {
+                script "../../../gunzip"
+
+                process {
+                    """
+                    input[0] = [
+                        [ id:'test' ],
+                        file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.hmm.gz', checkIfExists: true)
+                    ]
+                    """
+                }
+            }
+        }
+
+    test("sarscov2 - proteome - hmm - gz") {
+
+        when {
+            process {
+                """
+                input[0] = GUNZIP.out.gunzip
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("sarscov2 - proteome - hmm - gz - stub") {
+        options '-stub'
+
+        when {
+            process {
+                """
+                input[0] = GUNZIP.out.gunzip
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+}
diff --git a/modules/nf-core/hmmer/hmmpress/tests/main.nf.test.snap b/modules/nf-core/hmmer/hmmpress/tests/main.nf.test.snap
@@ -0,0 +1,88 @@
+{
+    "sarscov2 - proteome - hmm - gz": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "proteome.hmm.h3f:md5,d0640e710a5eec56aa95a64e6dcb9971",
+                            "proteome.hmm.h3i:md5,1e68ee61bfe47697e3df24b5551dbb82",
+                            "proteome.hmm.h3m:md5,9fa27bd2fda0e8c037852301245dcbfb",
+                            "proteome.hmm.h3p:md5,e73ab76f194340b797e8485464caa369"
+                        ]
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,f5473ee4ad53142d92c79b5a0fe94bf6"
+                ],
+                "compressed_db": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "proteome.hmm.h3f:md5,d0640e710a5eec56aa95a64e6dcb9971",
+                            "proteome.hmm.h3i:md5,1e68ee61bfe47697e3df24b5551dbb82",
+                            "proteome.hmm.h3m:md5,9fa27bd2fda0e8c037852301245dcbfb",
+                            "proteome.hmm.h3p:md5,e73ab76f194340b797e8485464caa369"
+                        ]
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,f5473ee4ad53142d92c79b5a0fe94bf6"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.10.4"
+        },
+        "timestamp": "2025-02-25T14:43:13.712539"
+    },
+    "sarscov2 - proteome - hmm - gz - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "stub.h3f:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "stub.h3i:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "stub.h3m:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "stub.h3p:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,f5473ee4ad53142d92c79b5a0fe94bf6"
+                ],
+                "compressed_db": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "stub.h3f:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "stub.h3i:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "stub.h3m:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "stub.h3p:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,f5473ee4ad53142d92c79b5a0fe94bf6"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.10.4"
+        },
+        "timestamp": "2025-02-25T14:43:19.740161"
+    }
+}
diff --git a/modules/nf-core/quilt/quilt/main.nf b/modules/nf-core/quilt/quilt/main.nf
@@ -8,9 +8,8 @@ process QUILT_QUILT {
         'biocontainers/r-quilt:1.0.5--r43h06b5641_0' }"
 
     input:
-    tuple val(meta), path(bams), path(bais), path(bamlist), path(reference_haplotype_file), path(reference_legend_file), val(chr), val(regions_start), val(regions_end), val(ngen), val(buffer), path(genetic_map_file)
-    tuple val(meta2), path(posfile), path(phasefile)
-    tuple val(meta3), path(fasta)
+    tuple val(meta), path(bams), path(bais), path(bamlist), path(samplename), path(reference_haplotype_file), path(reference_legend_file), path(posfile), path(phasefile), path(genfile), val(chr), val(regions_start), val(regions_end), val(ngen), val(buffer), path(genetic_map_file)
+    tuple val(meta2), path(fasta)
 
     output:
     tuple val(meta), path("*.vcf.gz"),              emit: vcf
@@ -23,15 +22,17 @@ process QUILT_QUILT {
     task.ext.when == null || task.ext.when
 
     script:
-    def args                        =   task.ext.args ?: ''
+    def args                        =   task.ext.args   ?: ''
     def prefix                      =   task.ext.prefix ?: "${meta.id}"
+    def suffix                      =   task.ext.suffix ?: "vcf.gz"
     def extensions                  =   bams.collect { it.extension }
     def extension                   =   extensions.flatten().unique()
     def list_command                =   extension == ["bam"]  ? "--bamlist="                       :
                                         extension == ["cram"] ? "--reference=${fasta} --cramlist=" : ""
     def genetic_map_file_command    =   genetic_map_file      ? "--genetic_map_file=${genetic_map_file}"     : ""
     def posfile_command             =   posfile               ? "--posfile=${posfile}"                       : ""
     def phasefile_command           =   phasefile             ? "--phasefile=${phasefile}"                   : ""
+    def samplename_command          =   samplename            ? "--sampleNames_file=${samplename}"           : ""
     if (!(args ==~ /.*--seed.*/)) {args += " --seed=1"}
 
     """
@@ -48,6 +49,7 @@ process QUILT_QUILT {
         $genetic_map_file_command \\
         $posfile_command \\
         $phasefile_command \\
+        $samplename_command \\
         --chr=$chr \\
         --regionStart=$regions_start \\
         --regionEnd=$regions_end \\
@@ -57,9 +59,48 @@ process QUILT_QUILT {
         --outputdir="." \\
         --reference_haplotype_file=$reference_haplotype_file \\
         --reference_legend_file=$reference_legend_file \\
+        --output_filename=${prefix}.${suffix} \\
         $args
 
 
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        r-base: \$(Rscript -e "cat(strsplit(R.version[['version.string']], ' ')[[1]][3])")
+        r-quilt: \$(Rscript -e "cat(as.character(utils::packageVersion(\\"QUILT\\")))")
+    END_VERSIONS
+    """
+
+    stub:
+    def args          = task.ext.args   ?: ''
+    def prefix        = task.ext.prefix ?: "${meta.id}"
+    def suffix        = task.ext.suffix ?: "vcf.gz"
+    def create_cmd    = suffix.endsWith(".gz") ? "echo '' | gzip >" : "touch"
+    def make_plots    = args.contains("--make_plots=TRUE")
+    def save_ref      = args.contains("--save_prepared_reference=TRUE")
+    def nGibbsSamples = args.contains("--nGibbsSamples=") ? args.split("--nGibbsSamples=")[1].split(" ")[0] : 7
+    def n_seek_its    = args.contains("--n_seek_its=")    ? args.split("--n_seek_its=")[1].split(" ")[0]    : 3
+
+    """
+    ${create_cmd} ${prefix}.${suffix}
+    touch ${prefix}.${suffix}.tbi
+    if [ "${save_ref}" == true ]
+    then
+        mkdir -p RData
+        touch "RData/QUILT_prepared_reference.${chr}.${regions_start}.${regions_end}.RData"
+    fi
+    if [ "${make_plots}" == true ]
+    then
+        mkdir -p plots
+        for nGibbs in {0..${nGibbsSamples}}
+        do
+            touch "plots/haps.${prefix}.${chr}.${regions_start}.${regions_end}_igs.\$((nGibbs+1)).0.truth.png"
+            for its in {1..${n_seek_its}}
+            do
+                touch "plots/haps.${prefix}.${chr}.${regions_start}.${regions_end}_igs.\$((nGibbs+1)).it\$its.gibbs.png"
+            done
+        done
+    fi
+
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         r-base: \$(Rscript -e "cat(strsplit(R.version[['version.string']], ' ')[[1]][3])")