From 0c220cf044bb5b06aa90350c19522ae6c2651e17 Mon Sep 17 00:00:00 2001
From: Matt Parker <matt.parker@nanoporetech.com>
Date: Tue, 16 May 2023 15:33:24 +0000
Subject: [PATCH] demo-cloud-url

---
 CHANGELOG.md                            |  4 ++
 bin/workflow_glue/check_sample_sheet.py | 50 +++++++++++++++----------
 lib/fastqingress.nf                     | 19 +++++++---
 nextflow.config                         |  2 +-
 nextflow_schema.json                    |  1 +
 5 files changed, 50 insertions(+), 26 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e93970f..549e108 100755
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [v0.0.10]
+### Updated
+- Update to schema to point to cloud demo
+
 ## [v0.0.9]
 ### Changed
 - Update licence to ONT Public License
diff --git a/bin/workflow_glue/check_sample_sheet.py b/bin/workflow_glue/check_sample_sheet.py
index 0f7d17b..2bd1779 100755
--- a/bin/workflow_glue/check_sample_sheet.py
+++ b/bin/workflow_glue/check_sample_sheet.py
@@ -11,7 +11,10 @@ def main(args):
 
     barcodes = []
     aliases = []
-    types = []
+    sample_types = []
+    allowed_sample_types = [
+        "test_sample", "positive_control", "negative_control", "no_template_control"
+        ]
 
     try:
         with open(args.sample_sheet, "r") as f:
@@ -31,45 +34,48 @@ def main(args):
                     barcodes.append(row["barcode"])
                 except KeyError:
                     sys.stdout.write("'barcode' column missing")
-                    exit()
+                    sys.exit()
                 try:
                     aliases.append(row["alias"])
                 except KeyError:
                     sys.stdout.write("'alias' column missing")
-                    exit()
+                    sys.exit()
                 try:
-                    types.append(row["type"])
+                    sample_types.append(row["type"])
                 except KeyError:
                     pass
     except Exception as e:
         sys.stdout.write(f"Parsing error: {e}")
-        exit()
+        sys.exit()
 
     # check barcode and alias values are unique
     if len(barcodes) > len(set(barcodes)):
         sys.stdout.write("values in 'barcode' column not unique")
-        exit()
+        sys.exit()
     if len(aliases) > len(set(aliases)):
         sys.stdout.write("values in 'alias' column not unique")
-        exit()
+        sys.exit()
 
-    if types:
+    if sample_types:
         # check if "type" column has unexpected values
-        unexp_type_vals = set(types) - set(
-            [
-                "test_sample",
-                "positive_control",
-                "negative_control",
-                "no_template_control",
-            ]
-        )
+        unexp_type_vals = set(sample_types) - set(allowed_sample_types)
+
         if unexp_type_vals:
             sys.stdout.write(
                 f"found unexpected values in 'type' column: {unexp_type_vals}. "
-                "allowed values are: `['test_sample', 'positive_control', "
-                "'negative_control', 'no_template_control']`"
+                f"Allowed values are: {allowed_sample_types}"
             )
-            exit()
+            sys.exit()
+
+        if args.required_sample_types:
+            for required_type in args.required_sample_types:
+                if required_type not in allowed_sample_types:
+                    sys.stdout.write(f"Not an allowed sample type: {required_type}")
+                    sys.exit()
+                if sample_types.count(required_type) < 1:
+                    sys.stdout.write(
+                        f"Sample sheet requires at least 1 of {required_type}")
+                    sys.exit()
 
     logger.info(f"Checked sample sheet {args.sample_sheet}.")
 
@@ -78,4 +84,10 @@ def argparser():
     """Argument parser for entrypoint."""
     parser = wf_parser("check_sample_sheet")
     parser.add_argument("sample_sheet", help="Sample sheet to check")
+    parser.add_argument(
+        "--required_sample_types",
+        help="List of required sample types. Each sample type provided must "
+             "appear at least once in the sample sheet",
+        nargs="*"
+    )
     return parser
diff --git a/lib/fastqingress.nf b/lib/fastqingress.nf
index e24048e..6fefbce 100644
--- a/lib/fastqingress.nf
+++ b/lib/fastqingress.nf
@@ -149,7 +149,7 @@ def watch_path(Map margs) {
         // add metadata from sample sheet (we can't use join here since it does not work
         // with repeated keys; we therefore need to transform the sample sheet data into
         // a map with the barcodes as keys)
-        def ch_sample_sheet = get_sample_sheet(file(margs.sample_sheet))
+        def ch_sample_sheet = get_sample_sheet(file(margs.sample_sheet), margs.required_sample_types)
         | collect
         | map { it.collectEntries { [(it["barcode"]): it] } }
         // now we can use this channel to annotate all files with the corresponding info
@@ -247,6 +247,7 @@ Map parse_arguments(Map arguments) {
                 "analyse_unclassified": false,
                 "fastcat_stats": false,
                 "fastcat_extra_args": "",
+                "required_sample_types": [],
                 "watch_path": false],
         name: "fastq_ingress")
     return parser.parse_args(arguments)
@@ -319,7 +320,7 @@ def get_valid_inputs(Map margs){
             // filter based on sample sheet in case one was provided
             if (margs.sample_sheet) {
                 // get channel of entries in the sample sheet
-                def ch_sample_sheet = get_sample_sheet(file(margs.sample_sheet))
+                def ch_sample_sheet = get_sample_sheet(file(margs.sample_sheet), margs.required_sample_types)
                 // get the union of both channels (missing values will be replaced with
                 // `null`)
                 def ch_union = Channel.fromPath(sub_dirs_with_fastq_files).map {
@@ -396,7 +397,7 @@ ArrayList get_fq_files_in_dir(Path dir) {
  * @param sample_sheet: path to the sample sheet CSV
  * @return: channel of maps (with values in sample sheet header as keys)
  */
-def get_sample_sheet(Path sample_sheet) {
+def get_sample_sheet(Path sample_sheet, ArrayList required_sample_types) {
     // If `validate_sample_sheet` does not return an error message, we can assume that
     // the sample sheet is valid and parse it. However, because of Nextflow's
     // asynchronous magic, we might emit values from `.splitCSV()` before the
@@ -405,7 +406,7 @@ def get_sample_sheet(Path sample_sheet) {
     // in STDOUT. Thus, we use the somewhat clunky construct with `concat` and `last`
     // below. This lets the CSV channel only start to emit once the error checking is
     // done.
-    ch_err = validate_sample_sheet(sample_sheet).map {
+    ch_err = validate_sample_sheet(sample_sheet, required_sample_types).map {
         // check if there was an error message
         if (it) error "Invalid sample sheet: ${it}."
         it
@@ -425,13 +426,19 @@ def get_sample_sheet(Path sample_sheet) {
  * message is emitted.
  *
  * @param: path to sample sheet CSV
+ * @param: list of required sample types (optional)
  * @return: string (optional)
  */
 process validate_sample_sheet {
     label params.process_label
-    input: path csv
+    input: 
+        path csv
+        val required_sample_types
     output: stdout
+    script:
+    String req_types_arg = required_sample_types ? "--required_sample_types "+required_sample_types.join(" ") : ""
     """
-    workflow-glue check_sample_sheet $csv
+    workflow-glue check_sample_sheet $csv $req_types_arg
     """
 }
+
diff --git a/nextflow.config b/nextflow.config
index 07da562..d39cc62 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -59,7 +59,7 @@ manifest {
     description     = 'Influenza A&B typing and analysis from Nanopore data.'
     mainScript      = 'main.nf'
     nextflowVersion = '>=20.10.0'
-    version         = 'v0.0.9'
+    version         = 'v0.0.10'
 }
 
 epi2melabs {
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 4575ab5..ccc248f 100755
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -4,6 +4,7 @@
     "title": "epi2me-labs/wf-flu",
     "description": "Influenza A&B typing and analysis from Nanopore data.",
     "demo_url": "https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-flu/wf-flu-demo.tar.gz",
+    "aws_demo_url": "https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-flu/wf-flu-demo/aws.nextflow.config",
     "url": "https://github.com/epi2me-labs/wf-flu",
     "type": "object",
     "definitions": {