From 846ca55b0fe3e334516da39fef6dcaf2ab2b4bae Mon Sep 17 00:00:00 2001
From: bjlang <>
Date: Fri, 15 Nov 2024 13:01:11 +0100
Subject: [PATCH] Fix tests; add version channels; remove params access in
 subworkflow

---
 subworkflows/local/correlation/main.nf  |  9 +++--
 subworkflows/local/differential/main.nf | 46 +++++++++++++------------
 subworkflows/local/enrichment/main.nf   |  9 +++--
 subworkflows/local/experimental/main.nf | 13 +++++--
 workflows/differentialabundance.nf      |  4 ++-
 5 files changed, 50 insertions(+), 31 deletions(-)

diff --git a/subworkflows/local/correlation/main.nf b/subworkflows/local/correlation/main.nf
index d62f435f..bb46c5de 100644
--- a/subworkflows/local/correlation/main.nf
+++ b/subworkflows/local/correlation/main.nf
@@ -10,8 +10,9 @@ workflow CORRELATION {
     main:
 
     // initialize empty results channels
-    ch_matrix   = Channel.empty()
+    ch_matrix    = Channel.empty()
     ch_adjacency = Channel.empty()
+    ch_versions  = Channel.empty()
 
     // branch tools to select the correct correlation analysis method
     ch_counts
@@ -26,10 +27,12 @@ workflow CORRELATION {
     PROPR(ch_counts.propr.unique())
     ch_matrix    = PROPR.out.matrix.mix(ch_matrix)
     ch_adjacency = PROPR.out.adjacency.mix(ch_adjacency)
+    ch_versions  = ch_versions.mix(PROPR.out.versions)
 
     // TODO: divide propr module into cor, propr, pcor, pcorbshrink, etc.
 
     emit:
-    matrix    = ch_matrix
-    adjacency = ch_adjacency
+    matrix    = ch_matrix     // channel: [ csv ]
+    adjacency = ch_adjacency  // channel: [ csv ]
+    versions  = ch_versions   // channel: [ versions.yml ]
 }
diff --git a/subworkflows/local/differential/main.nf b/subworkflows/local/differential/main.nf
index 4271787d..ff2f0531 100644
--- a/subworkflows/local/differential/main.nf
+++ b/subworkflows/local/differential/main.nf
@@ -13,6 +13,8 @@ workflow DIFFERENTIAL {
     ch_counts             // [ meta_exp, counts ] with meta keys: method, args_diff
     ch_samplesheet        // [ meta_exp, samplesheet ]
     ch_contrasts          // [ meta_contrast, contrast_variable, reference, target ]
+    ch_transcript_lengths
+    ch_control_features
 
     main:
 
@@ -22,6 +24,7 @@ workflow DIFFERENTIAL {
     ch_results_genewise          = Channel.empty()
     ch_results_genewise_filtered = Channel.empty()
     ch_adjacency                 = Channel.empty()
+    ch_versions                  = Channel.empty()
 
     // branch tools to select the correct differential analysis method
     ch_counts
@@ -56,28 +59,25 @@ workflow DIFFERENTIAL {
     ch_results_genewise          = PROPD.out.connectivity.mix(ch_results_genewise)
     ch_results_genewise_filtered = PROPD.out.hub_genes.mix(ch_results_genewise_filtered)
     ch_adjacency                 = PROPD.out.adjacency.mix(ch_adjacency)
+    ch_versions                  = PROPD.out.versions.mix(ch_versions)
 
     // ----------------------------------------------------
     // Perform differential analysis with DESeq2
     // ----------------------------------------------------
 
-    if (params.transcript_length_matrix) { ch_transcript_lengths = Channel.of([ exp_meta, file(params.transcript_length_matrix, checkIfExists: true)]).first() } else { ch_transcript_lengths = Channel.of([[],[]]) }
-    if (params.control_features) { ch_control_features = Channel.of([ exp_meta, file(params.control_features, checkIfExists: true)]).first() } else { ch_control_features = Channel.of([[],[]]) }
-
-    ch_counts
-        .join(ch_samplesheet)
+    ch_counts.deseq2
+        .combine(ch_samplesheet)
+        .filter{ meta_counts, counts, meta_samplesheet, samplesheet -> meta_counts.subMap(meta_samplesheet.keySet()) == meta_samplesheet }
         .combine(ch_contrasts)
         .combine(ch_transcript_lengths)
         .combine(ch_control_features)
-        .combine(ch_tools_single.deseq2)
         .multiMap {
-            meta_data, counts, samplesheet, meta_contrast, contrast_variable, reference, target, meta_lengths, lengths, meta_control, control, pathway, meta_tools ->
-                def meta = meta_data.clone() + meta_contrast.clone() + meta_lengths.clone() + meta_control.clone() + meta_tools.clone()
+            meta_data, counts, meta_samplesheet, samplesheet, meta_contrast, contrast_variable, reference, target, meta_lengths, lengths, meta_control, control ->
+                def meta = meta_data.clone() + meta_contrast.clone() + meta_lengths.clone() + meta_control.clone()
                 contrast: [ meta, contrast_variable, reference, target ]
                 samples_and_matrix: [ meta, samplesheet, counts ]
                 control_features:   [ meta, control ]
                 transcript_lengths: [ meta, lengths ]
-                pathway: [ meta, pathway ]
         }
         .set { ch_deseq2 }
 
@@ -96,9 +96,10 @@ workflow DIFFERENTIAL {
             ch_deseq2.transcript_lengths
         )
 
-    ch_norm_deseq2 = DESEQ2_NORM.out.normalised_counts
+    ch_norm_deseq2         = DESEQ2_NORM.out.normalised_counts
     ch_differential_deseq2 = DESEQ2_DIFFERENTIAL.out.results
-    ch_model_deseq2 = DESEQ2_DIFFERENTIAL.out.model
+    ch_model_deseq2        = DESEQ2_DIFFERENTIAL.out.model
+    ch_versions            = DESEQ2_DIFFERENTIAL.out.versions.mix(ch_versions)
 
     ch_processed_matrices = ch_norm_deseq2
     if ('rlog' in params.deseq2_vs_method){
@@ -120,11 +121,9 @@ workflow DIFFERENTIAL {
         ch_padj_deseq2
     )
 
-    ch_results_genewise = DESEQ2_DIFFERENTIAL.out.results
-                            .join(ch_deseq2.pathway).map(correct_meta_data).mix(ch_results_genewise)
-
-    ch_results_genewise_filtered = FILTER_DIFFTABLE_DESEQ2.out.filtered
-                            .join(ch_deseq2.pathway).map(correct_meta_data).mix(ch_results_genewise_filtered)
+    ch_results_genewise          = DESEQ2_DIFFERENTIAL.out.results.mix(ch_results_genewise)
+    ch_results_genewise_filtered = FILTER_DIFFTABLE_DESEQ2.out.filtered.mix(ch_results_genewise_filtered)
+    ch_versions                  = FILTER_DIFFTABLE_DESEQ2.out.versions.mix(ch_versions)
 
     // ----------------------------------------------------
     // Perform differential analysis with limma
@@ -148,6 +147,7 @@ workflow DIFFERENTIAL {
 
     // run limma
     LIMMA_DIFFERENTIAL(ch_limma.input1, ch_limma.input2)
+    ch_versions = LIMMA_DIFFERENTIAL.out.versions.mix(ch_versions)
 
     // filter results
     // note that these are column names specific for limma output table
@@ -161,13 +161,15 @@ workflow DIFFERENTIAL {
     )
 
     // collect results
-    ch_results_genewise = LIMMA_DIFFERENTIAL.out.results.mix(ch_results_genewise)
+    ch_results_genewise          = LIMMA_DIFFERENTIAL.out.results.mix(ch_results_genewise)
     ch_results_genewise_filtered = FILTER_DIFFTABLE_LIMMA.out.filtered.mix(ch_results_genewise_filtered)
+    ch_versions                  = FILTER_DIFFTABLE_LIMMA.out.versions.mix(ch_versions)
 
     emit:
-    results_pairwise          = ch_results_pairwise
-    results_pairwise_filtered = ch_results_pairwise_filtered
-    results_genewise          = ch_results_genewise
-    results_genewise_filtered = ch_results_genewise_filtered
-    adjacency                 = ch_adjacency
+    results_pairwise          = ch_results_pairwise           // channel: [ tsv ]
+    results_pairwise_filtered = ch_results_pairwise_filtered  // channel: [ tsv ]
+    results_genewise          = ch_results_genewise           // channel: [ tsv ]
+    results_genewise_filtered = ch_results_genewise_filtered  // channel: [ tsv ]
+    adjacency                 = ch_adjacency                  // channel: [ tsv ]
+    versions                  = ch_versions                   // channel: [ versions.yml ]
 }
diff --git a/subworkflows/local/enrichment/main.nf b/subworkflows/local/enrichment/main.nf
index 50736a62..668641cf 100644
--- a/subworkflows/local/enrichment/main.nf
+++ b/subworkflows/local/enrichment/main.nf
@@ -18,6 +18,7 @@ workflow ENRICHMENT {
     // initialize empty results channels
     ch_enriched = Channel.empty()
     ch_gmt      = Channel.empty()
+    ch_versions = Channel.empty()
 
     ch_adjacency
         .branch {
@@ -32,7 +33,8 @@ workflow ENRICHMENT {
     // TODO this should be optional, only run when there is no gene set data provided by user
 
     MYGENE(ch_counts.take(1))  // only one data is provided to this pipeline
-    ch_gmt = MYGENE.out.gmt
+    ch_gmt      = MYGENE.out.gmt
+    ch_versions = ch_versions.mix(MYGENE.out.versions)
 
     // ----------------------------------------------------
     // Perform enrichment analysis with GREA
@@ -40,6 +42,7 @@ workflow ENRICHMENT {
 
     GREA(ch_adjacency.grea.unique(), ch_gmt.collect())
     ch_enriched = ch_enriched.mix(GREA.out.results)
+    ch_versions = ch_versions.mix(GREA.out.versions)
 
     // ----------------------------------------------------
     // Perform enrichment analysis with GSEA
@@ -74,7 +77,9 @@ workflow ENRICHMENT {
         .set { ch_results_genewise_filtered }
 
     GPROFILER2_GOST(ch_results_genewise_filtered, ch_gmt, ch_background)
+    ch_versions = ch_versions.mix(GPROFILER2_GOST.out.versions)
 
     emit:
-    enriched = ch_enriched
+    enriched = ch_enriched  // channel: [ tsv ]
+    versions = ch_versions  // channel: [ versions.yml ]
 }
diff --git a/subworkflows/local/experimental/main.nf b/subworkflows/local/experimental/main.nf
index ce83ccf5..7244c172 100644
--- a/subworkflows/local/experimental/main.nf
+++ b/subworkflows/local/experimental/main.nf
@@ -33,6 +33,8 @@ workflow EXPERIMENTAL {
     ch_samplesheet  // [ meta, samplesheet ]
     ch_counts       // [ meta, counts]
     ch_tools        // [ pathway_name, differential_map, correlation_map, enrichment_map ]
+    ch_transcript_lengths
+    ch_control_features
 
     main:
 
@@ -54,8 +56,8 @@ workflow EXPERIMENTAL {
     ch_results_genewise = Channel.empty()               // differential results for genewise analysis - it should be a table
     ch_results_genewise_filtered = Channel.empty()      // differential results for genewise analysis - filtered - it should be a table
     ch_adjacency = Channel.empty()                      // adjacency matrix showing the connections between the genes, with values 1|0
-    ch_matrix = Channel.empty()                         // correlation matrix
-    ch_enriched = Channel.empty()                       // output table from enrichment analysis
+    ch_matrix    = Channel.empty()                      // correlation matrix
+    ch_enriched  = Channel.empty()                      // output table from enrichment analysis
 
     // ----------------------------------------------------
     // DIFFERENTIAL ANALYSIS BLOCK
@@ -66,7 +68,9 @@ workflow EXPERIMENTAL {
     DIFFERENTIAL(
         ch_counts_diff,
         ch_samplesheet,
-        ch_contrasts
+        ch_contrasts,
+        ch_transcript_lengths,
+        ch_control_features
     )
     ch_results_pairwise          = postprocess_subworkflow_output(DIFFERENTIAL.out.results_pairwise,["method", "args_diff"]).mix(ch_results_pairwise)
     ch_results_pairwise_filtered = postprocess_subworkflow_output(DIFFERENTIAL.out.results_pairwise_filtered,["method", "args_diff"]).mix(ch_results_pairwise_filtered)
@@ -113,4 +117,7 @@ workflow EXPERIMENTAL {
     // ----------------------------------------------------
 
     // TODO: call visualization stuff here
+
+    emit:
+    versions = ch_versions
 }
diff --git a/workflows/differentialabundance.nf b/workflows/differentialabundance.nf
index 365ba850..216384bc 100644
--- a/workflows/differentialabundance.nf
+++ b/workflows/differentialabundance.nf
@@ -388,7 +388,9 @@ workflow DIFFERENTIALABUNDANCE {
             ch_contrasts,
             VALIDATOR.out.sample_meta,
             CUSTOM_MATRIXFILTER.out.filtered,
-            ch_tools
+            ch_tools,
+            ch_transcript_lengths,
+            ch_control_features
         )
 
         // TODO for the moment, these channels are allocated to not breaking the next part.