Skip to content

Commit

Permalink
Merge branch 'demo-cloud-url' into 'dev'
Browse files Browse the repository at this point in the history
demo-cloud-url

See merge request epi2melabs/workflows/wf-flu!43
  • Loading branch information
mattdmem committed May 16, 2023
2 parents a21c1b8 + 0c220cf commit df66cbf
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 26 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [v0.0.10]
### Updated
- Update to schema to point to cloud demo

## [v0.0.9]
### Changed
- Update licence to ONT Public License
Expand Down
50 changes: 31 additions & 19 deletions bin/workflow_glue/check_sample_sheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ def main(args):

barcodes = []
aliases = []
types = []
sample_types = []
allowed_sample_types = [
"test_sample", "positive_control", "negative_control", "no_template_control"
]

try:
with open(args.sample_sheet, "r") as f:
Expand All @@ -31,45 +34,48 @@ def main(args):
barcodes.append(row["barcode"])
except KeyError:
sys.stdout.write("'barcode' column missing")
exit()
sys.exit()
try:
aliases.append(row["alias"])
except KeyError:
sys.stdout.write("'alias' column missing")
exit()
sys.exit()
try:
types.append(row["type"])
sample_types.append(row["type"])
except KeyError:
pass
except Exception as e:
sys.stdout.write(f"Parsing error: {e}")
exit()
sys.exit()

# check barcode and alias values are unique
if len(barcodes) > len(set(barcodes)):
sys.stdout.write("values in 'barcode' column not unique")
exit()
sys.exit()
if len(aliases) > len(set(aliases)):
sys.stdout.write("values in 'alias' column not unique")
exit()
sys.exit()

if types:
if sample_types:
# check if "type" column has unexpected values
unexp_type_vals = set(types) - set(
[
"test_sample",
"positive_control",
"negative_control",
"no_template_control",
]
)
unexp_type_vals = set(sample_types) - set(allowed_sample_types)

if unexp_type_vals:
sys.stdout.write(
f"found unexpected values in 'type' column: {unexp_type_vals}. "
"allowed values are: `['test_sample', 'positive_control', "
"'negative_control', 'no_template_control']`"
f"Allowed values are: {allowed_sample_types}"
)
exit()
sys.exit()

if args.required_sample_types:
for required_type in args.required_sample_types:
if required_type not in allowed_sample_types:
sys.stdout.write(f"Not an allowed sample type: {required_type}")
sys.exit()
if sample_types.count(required_type) < 1:
sys.stdout.write(
f"Sample sheet requires at least 1 of {required_type}")
sys.exit()

logger.info(f"Checked sample sheet {args.sample_sheet}.")

Expand All @@ -78,4 +84,10 @@ def argparser():
"""Argument parser for entrypoint."""
parser = wf_parser("check_sample_sheet")
parser.add_argument("sample_sheet", help="Sample sheet to check")
parser.add_argument(
"--required_sample_types",
help="List of required sample types. Each sample type provided must "
"appear at least once in the sample sheet",
nargs="*"
)
return parser
19 changes: 13 additions & 6 deletions lib/fastqingress.nf
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def watch_path(Map margs) {
// add metadata from sample sheet (we can't use join here since it does not work
// with repeated keys; we therefore need to transform the sample sheet data into
// a map with the barcodes as keys)
def ch_sample_sheet = get_sample_sheet(file(margs.sample_sheet))
def ch_sample_sheet = get_sample_sheet(file(margs.sample_sheet), margs.required_sample_types)
| collect
| map { it.collectEntries { [(it["barcode"]): it] } }
// now we can use this channel to annotate all files with the corresponding info
Expand Down Expand Up @@ -247,6 +247,7 @@ Map parse_arguments(Map arguments) {
"analyse_unclassified": false,
"fastcat_stats": false,
"fastcat_extra_args": "",
"required_sample_types": [],
"watch_path": false],
name: "fastq_ingress")
return parser.parse_args(arguments)
Expand Down Expand Up @@ -319,7 +320,7 @@ def get_valid_inputs(Map margs){
// filter based on sample sheet in case one was provided
if (margs.sample_sheet) {
// get channel of entries in the sample sheet
def ch_sample_sheet = get_sample_sheet(file(margs.sample_sheet))
def ch_sample_sheet = get_sample_sheet(file(margs.sample_sheet), margs.required_sample_types)
// get the union of both channels (missing values will be replaced with
// `null`)
def ch_union = Channel.fromPath(sub_dirs_with_fastq_files).map {
Expand Down Expand Up @@ -396,7 +397,7 @@ ArrayList get_fq_files_in_dir(Path dir) {
* @param sample_sheet: path to the sample sheet CSV
* @return: channel of maps (with values in sample sheet header as keys)
*/
def get_sample_sheet(Path sample_sheet) {
def get_sample_sheet(Path sample_sheet, ArrayList required_sample_types) {
// If `validate_sample_sheet` does not return an error message, we can assume that
// the sample sheet is valid and parse it. However, because of Nextflow's
// asynchronous magic, we might emit values from `.splitCSV()` before the
Expand All @@ -405,7 +406,7 @@ def get_sample_sheet(Path sample_sheet) {
// in STDOUT. Thus, we use the somewhat clunky construct with `concat` and `last`
// below. This lets the CSV channel only start to emit once the error checking is
// done.
ch_err = validate_sample_sheet(sample_sheet).map {
ch_err = validate_sample_sheet(sample_sheet, required_sample_types).map {
// check if there was an error message
if (it) error "Invalid sample sheet: ${it}."
it
Expand All @@ -425,13 +426,19 @@ def get_sample_sheet(Path sample_sheet) {
* message is emitted.
*
* @param: path to sample sheet CSV
* @param: list of required sample types (optional)
* @return: string (optional)
*/
process validate_sample_sheet {
label params.process_label
input: path csv
input:
path csv
val required_sample_types
output: stdout
script:
String req_types_arg = required_sample_types ? "--required_sample_types "+required_sample_types.join(" ") : ""
"""
workflow-glue check_sample_sheet $csv
workflow-glue check_sample_sheet $csv $req_types_arg
"""
}

2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ manifest {
description = 'Influenza A&B typing and analysis from Nanopore data.'
mainScript = 'main.nf'
nextflowVersion = '>=20.10.0'
version = 'v0.0.9'
version = 'v0.0.10'
}

epi2melabs {
Expand Down
1 change: 1 addition & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"title": "epi2me-labs/wf-flu",
"description": "Influenza A&B typing and analysis from Nanopore data.",
"demo_url": "https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-flu/wf-flu-demo.tar.gz",
"aws_demo_url": "https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-flu/wf-flu-demo/aws.nextflow.config",
"url": "https://github.com/epi2me-labs/wf-flu",
"type": "object",
"definitions": {
Expand Down

0 comments on commit df66cbf

Please sign in to comment.