cancerit · vaofford · Sep 15, 2023 · Aug 25, 2023 · Aug 25, 2023 · Sep 14, 2023
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
@@ -99,6 +99,7 @@ jobs:
           allow-repeats: false
 
   nf-core:
+    if: false
     runs-on: ubuntu-latest
     steps:
 

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -37,11 +37,11 @@ Initial release of QUANTS, created with the [nf-core](https://nf-co.re/) templat
 ## 3.0.0.0 - [21st August 2023]
 
 * Split read trimming into two stages
-	* Adapter trimming - removes user-defined adapter sequences and takes forward both trimmed and untrimmed reads
-	* Primer trimming - removes user-defined primer sequences and takes forward only trimmed reads
+    * Adapter trimming - removes user-defined adapter sequences and takes forward both trimmed and untrimmed reads
+    * Primer trimming - removes user-defined primer sequences and takes forward only trimmed reads
 * Add a read modification process which can append user-defined sequences to trimmed reads
 * Add library transformer to allow users to provide libraries in a different format (e.g. the meta CSV from VaLiAnT) and convert it for use with pyQUEST
 
 ## 3.0.0.1 - [12th September 2023]
 
-* Primer trimming - bugfix to ensure cutadapt splits reads into trimmed and untrimmed files
+* Primer trimming - bugfix to ensure cutadapt splits reads into trimmed and untrimmed files
diff --git a/functions/functions.nf b/functions/functions.nf
@@ -0,0 +1,45 @@
+//
+// takes channel and SEQUENCING_QC workflow object
+// extracts seqkit_stats output channel, combines it with workflow name and appends to input channel
+//
+def add_seqkit_stats(channel, workflow) {
+    return channel.mix(
+        workflow.out.seqkit_stats.combine(
+            [workflow.name.split(':').last()]
+        )
+    )
+}
+
+//
+// removes stage suffix from the sample name
+//
+def trim_sample_name(sample_name) {
+    sample_name
+        .replaceFirst(/_raw$/, "")
+        .replaceFirst(/_primer_trimmed$/, "")
+        .replaceFirst(/_adapter_trimmed$/, "")
+        .replaceFirst(/_merged$/, "")
+        .replaceFirst(/_merged_filtered$/, "")
+}
+
+//
+// each seqkit stat file prepends with two columns for sample and stage
+//
+def modify_seqkit_stats(meta, path, stage) {
+    // TODO should be removed in the future once sample name handling in the pipeline is consistent
+    def sample_name = trim_sample_name(meta.id)
+
+    newLines = []
+    file(path)
+        .readLines()
+        .eachWithIndex { it, i ->
+            if (i == 0) {
+                line = "sample" + "\t" + "stage" + "\t" + it
+            } else {
+                line = sample_name + "\t" + stage + "\t" + it
+            }
+            newLines.add(line)
+        }
+
+    return newLines.join("\n") + "\n"
+}
diff --git a/workflows/sge.nf b/workflows/sge.nf
@@ -182,6 +182,11 @@ include { SEQUENCING_QC as RAW_SEQUENCING_QC;
 //
 include { MULTIQC } from '../modules/nf-core/multiqc/main' addParams( options: multiqc_options   )
 
+//
+// FUNCTIONS: collection of custom functions
+//
+include { add_seqkit_stats; modify_seqkit_stats } from '../functions/functions.nf'
+
 /*
 ========================================================================================
     RUN MAIN WORKFLOW
@@ -194,6 +199,7 @@ def multiqc_report = []
 workflow SGE {
     // Set up empty channels
     ch_software_versions = Channel.empty()
+    seqkit_stat_ch = Channel.empty()
 
     if (params.input_type == 'cram') {
         //
@@ -224,6 +230,7 @@ workflow SGE {
         ch_raw_read_qc = ch_raw_reads.map{it -> [[id: it[0].id + '_raw', single_end: it[0].single_end], it[1]]}
         RAW_SEQUENCING_QC ( ch_raw_read_qc )
         ch_software_versions = ch_software_versions.mix(RAW_SEQUENCING_QC.out.fastqc_version, RAW_SEQUENCING_QC.out.seqkit_version)
+        seqkit_stat_ch = add_seqkit_stats(seqkit_stat_ch, RAW_SEQUENCING_QC)
     }
 
     //
@@ -240,6 +247,7 @@ workflow SGE {
             ch_adapter_trimming_qc = ADAPTER_TRIMMING.out.reads.map{it -> [[id: it[0].id + '_adapter_trimmed', single_end: it[0].single_end], it[1]]}
             ADAPTER_TRIMMED_SEQUENCING_QC ( ch_adapter_trimming_qc )
             ch_software_versions = ch_software_versions.mix(ADAPTER_TRIMMED_SEQUENCING_QC.out.fastqc_version, ADAPTER_TRIMMED_SEQUENCING_QC.out.seqkit_version)
+            seqkit_stat_ch = add_seqkit_stats(seqkit_stat_ch, ADAPTER_TRIMMED_SEQUENCING_QC)
         }
         // Send to next stage
         ch_primer_trim = ADAPTER_TRIMMING.out.reads
@@ -261,6 +269,7 @@ workflow SGE {
             ch_primer_trimming_qc = PRIMER_TRIMMING.out.reads.map{it -> [[id: it[0].id + '_primer_trimmed', single_end: it[0].single_end], it[1]]}
             PRIMER_TRIMMED_SEQUENCING_QC ( ch_primer_trimming_qc )
             ch_software_versions = ch_software_versions.mix(PRIMER_TRIMMED_SEQUENCING_QC.out.fastqc_version, PRIMER_TRIMMED_SEQUENCING_QC.out.seqkit_version)
+            seqkit_stat_ch = add_seqkit_stats(seqkit_stat_ch, PRIMER_TRIMMED_SEQUENCING_QC)
         }
         // Send to next stage
         ch_read_merge = PRIMER_TRIMMING.out.reads
@@ -283,6 +292,7 @@ workflow SGE {
             ch_merged_read_qc = ch_read_transform
             MERGED_SEQUENCING_QC ( ch_merged_read_qc )
             ch_software_versions = ch_software_versions.mix(MERGED_SEQUENCING_QC.out.fastqc_version, MERGED_SEQUENCING_QC.out.seqkit_version)
+            seqkit_stat_ch = add_seqkit_stats(seqkit_stat_ch, MERGED_SEQUENCING_QC)
         }
     } else {
         ch_read_transform = ch_read_merge
@@ -316,6 +326,7 @@ workflow SGE {
             ch_filtered_read_qc = READ_FILTERING.out.reads.map{it -> [[id: it[0].id + '_filtered', single_end: true], it[1]]}
             FILTERED_SEQUENCING_QC ( ch_filtered_read_qc )
             ch_software_versions = ch_software_versions.mix(FILTERED_SEQUENCING_QC.out.fastqc_version, FILTERED_SEQUENCING_QC.out.seqkit_version)
+            seqkit_stat_ch = add_seqkit_stats(seqkit_stat_ch, FILTERED_SEQUENCING_QC)
         }
     } else {
         ch_reads_to_modify = ch_read_filter
@@ -361,6 +372,10 @@ workflow SGE {
         ch_software_versions.map { it }.collect()
     )
 
+    seqkit_stat_ch
+        .map { meta, file, stage -> modify_seqkit_stats(meta, file, stage) }
+        .collectFile(keepHeader: true, name: 'seqkit_stats.tsv', storeDir: "${params.outdir}/seqkit_stats")
+
     //
     // MODULE: MultiQC
     //