From 0c220cf044bb5b06aa90350c19522ae6c2651e17 Mon Sep 17 00:00:00 2001 From: Matt Parker Date: Tue, 16 May 2023 15:33:24 +0000 Subject: [PATCH] demo-cloud-url --- CHANGELOG.md | 4 ++ bin/workflow_glue/check_sample_sheet.py | 50 +++++++++++++++---------- lib/fastqingress.nf | 19 +++++++--- nextflow.config | 2 +- nextflow_schema.json | 1 + 5 files changed, 50 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e93970f..549e108 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [v0.0.10] +### Updated +- Update to schema to point to cloud demo + ## [v0.0.9] ### Changed - Update licence to ONT Public License diff --git a/bin/workflow_glue/check_sample_sheet.py b/bin/workflow_glue/check_sample_sheet.py index 0f7d17b..2bd1779 100755 --- a/bin/workflow_glue/check_sample_sheet.py +++ b/bin/workflow_glue/check_sample_sheet.py @@ -11,7 +11,10 @@ def main(args): barcodes = [] aliases = [] - types = [] + sample_types = [] + allowed_sample_types = [ + "test_sample", "positive_control", "negative_control", "no_template_control" + ] try: with open(args.sample_sheet, "r") as f: @@ -31,45 +34,48 @@ def main(args): barcodes.append(row["barcode"]) except KeyError: sys.stdout.write("'barcode' column missing") - exit() + sys.exit() try: aliases.append(row["alias"]) except KeyError: sys.stdout.write("'alias' column missing") - exit() + sys.exit() try: - types.append(row["type"]) + sample_types.append(row["type"]) except KeyError: pass except Exception as e: sys.stdout.write(f"Parsing error: {e}") - exit() + sys.exit() # check barcode and alias values are unique if len(barcodes) > len(set(barcodes)): sys.stdout.write("values in 'barcode' column not unique") - exit() + sys.exit() if len(aliases) > len(set(aliases)): sys.stdout.write("values in 'alias' column not unique") - exit() + sys.exit() - if types: + if sample_types: # check if "type" column has unexpected values - unexp_type_vals = set(types) - set( - [ - "test_sample", - "positive_control", - "negative_control", - "no_template_control", - ] - ) + unexp_type_vals = set(sample_types) - set(allowed_sample_types) + if unexp_type_vals: sys.stdout.write( f"found unexpected values in 'type' column: {unexp_type_vals}. " - "allowed values are: `['test_sample', 'positive_control', " - "'negative_control', 'no_template_control']`" + f"Allowed values are: {allowed_sample_types}" ) - exit() + sys.exit() + + if args.required_sample_types: + for required_type in args.required_sample_types: + if required_type not in allowed_sample_types: + sys.stdout.write(f"Not an allowed sample type: {required_type}") + sys.exit() + if sample_types.count(required_type) < 1: + sys.stdout.write( + f"Sample sheet requires at least 1 of {required_type}") + sys.exit() logger.info(f"Checked sample sheet {args.sample_sheet}.") @@ -78,4 +84,10 @@ def argparser(): """Argument parser for entrypoint.""" parser = wf_parser("check_sample_sheet") parser.add_argument("sample_sheet", help="Sample sheet to check") + parser.add_argument( + "--required_sample_types", + help="List of required sample types. Each sample type provided must " + "appear at least once in the sample sheet", + nargs="*" + ) return parser diff --git a/lib/fastqingress.nf b/lib/fastqingress.nf index e24048e..6fefbce 100644 --- a/lib/fastqingress.nf +++ b/lib/fastqingress.nf @@ -149,7 +149,7 @@ def watch_path(Map margs) { // add metadata from sample sheet (we can't use join here since it does not work // with repeated keys; we therefore need to transform the sample sheet data into // a map with the barcodes as keys) - def ch_sample_sheet = get_sample_sheet(file(margs.sample_sheet)) + def ch_sample_sheet = get_sample_sheet(file(margs.sample_sheet), margs.required_sample_types) | collect | map { it.collectEntries { [(it["barcode"]): it] } } // now we can use this channel to annotate all files with the corresponding info @@ -247,6 +247,7 @@ Map parse_arguments(Map arguments) { "analyse_unclassified": false, "fastcat_stats": false, "fastcat_extra_args": "", + "required_sample_types": [], "watch_path": false], name: "fastq_ingress") return parser.parse_args(arguments) @@ -319,7 +320,7 @@ def get_valid_inputs(Map margs){ // filter based on sample sheet in case one was provided if (margs.sample_sheet) { // get channel of entries in the sample sheet - def ch_sample_sheet = get_sample_sheet(file(margs.sample_sheet)) + def ch_sample_sheet = get_sample_sheet(file(margs.sample_sheet), margs.required_sample_types) // get the union of both channels (missing values will be replaced with // `null`) def ch_union = Channel.fromPath(sub_dirs_with_fastq_files).map { @@ -396,7 +397,7 @@ ArrayList get_fq_files_in_dir(Path dir) { * @param sample_sheet: path to the sample sheet CSV * @return: channel of maps (with values in sample sheet header as keys) */ -def get_sample_sheet(Path sample_sheet) { +def get_sample_sheet(Path sample_sheet, ArrayList required_sample_types) { // If `validate_sample_sheet` does not return an error message, we can assume that // the sample sheet is valid and parse it. However, because of Nextflow's // asynchronous magic, we might emit values from `.splitCSV()` before the @@ -405,7 +406,7 @@ def get_sample_sheet(Path sample_sheet) { // in STDOUT. Thus, we use the somewhat clunky construct with `concat` and `last` // below. This lets the CSV channel only start to emit once the error checking is // done. - ch_err = validate_sample_sheet(sample_sheet).map { + ch_err = validate_sample_sheet(sample_sheet, required_sample_types).map { // check if there was an error message if (it) error "Invalid sample sheet: ${it}." it @@ -425,13 +426,19 @@ def get_sample_sheet(Path sample_sheet) { * message is emitted. * * @param: path to sample sheet CSV + * @param: list of required sample types (optional) * @return: string (optional) */ process validate_sample_sheet { label params.process_label - input: path csv + input: + path csv + val required_sample_types output: stdout + script: + String req_types_arg = required_sample_types ? "--required_sample_types "+required_sample_types.join(" ") : "" """ - workflow-glue check_sample_sheet $csv + workflow-glue check_sample_sheet $csv $req_types_arg """ } + diff --git a/nextflow.config b/nextflow.config index 07da562..d39cc62 100644 --- a/nextflow.config +++ b/nextflow.config @@ -59,7 +59,7 @@ manifest { description = 'Influenza A&B typing and analysis from Nanopore data.' mainScript = 'main.nf' nextflowVersion = '>=20.10.0' - version = 'v0.0.9' + version = 'v0.0.10' } epi2melabs { diff --git a/nextflow_schema.json b/nextflow_schema.json index 4575ab5..ccc248f 100755 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -4,6 +4,7 @@ "title": "epi2me-labs/wf-flu", "description": "Influenza A&B typing and analysis from Nanopore data.", "demo_url": "https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-flu/wf-flu-demo.tar.gz", + "aws_demo_url": "https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-flu/wf-flu-demo/aws.nextflow.config", "url": "https://github.com/epi2me-labs/wf-flu", "type": "object", "definitions": {