From c212056e866d83e383db0928d4c55ef082a040a8 Mon Sep 17 00:00:00 2001 From: Ryan Lim Date: Wed, 22 Dec 2021 01:57:46 +0000 Subject: [PATCH 1/5] add error function to python --- consensus-genome/run.wdl | 6 ++++-- consensus-genome/test/empty.bam | Bin 0 -> 146 bytes 2 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 consensus-genome/test/empty.bam diff --git a/consensus-genome/run.wdl b/consensus-genome/run.wdl index b12521bd..05865299 100644 --- a/consensus-genome/run.wdl +++ b/consensus-genome/run.wdl @@ -897,18 +897,20 @@ task ComputeStats { import json import re import pysam + import sys from Bio import SeqIO import numpy as np from matplotlib import pyplot as plt import seaborn as sns - + + error = lambda err, cause: sys.exit(json.dumps(dict(wdl_error_message=True, error=err, cause=cause))) stats = {"sample_name": "~{sample}"} depths = open("~{prefix}samtools_depth.txt").read().splitlines() if depths: depths = np.array([int(d) for d in depths]) else: - raise Exception("Insufficient coverage to proceed with CG analysis") + error("InsufficientReadsError", "Insufficient coverage to proceed with CG analysis") stats["depth_avg"] = depths.mean() stats["depth_q.25"] = np.quantile(depths, .25) diff --git a/consensus-genome/test/empty.bam b/consensus-genome/test/empty.bam new file mode 100644 index 0000000000000000000000000000000000000000..4ddd52811c66498714b6b07dadbabc0c7e19c1c9 GIT binary patch literal 146 zcmb2|=3rp}f&Xj_PR>jWr3}SIUsBJcCL|PmCo008xYHC_M! literal 0 HcmV?d00001 From 3d8c1825b940acb6ddee46a4a3fa17b4c96fa71c Mon Sep 17 00:00:00 2001 From: Ryan Lim Date: Wed, 22 Dec 2021 19:08:34 +0000 Subject: [PATCH 2/5] add test --- consensus-genome/test/test_wdl.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/consensus-genome/test/test_wdl.py b/consensus-genome/test/test_wdl.py index 6c723038..ed6537c4 100644 --- a/consensus-genome/test/test_wdl.py +++ b/consensus-genome/test/test_wdl.py @@ -168,6 +168,28 @@ def test_fetch_sequence_by_expired_accession_id(self): self.assertRunFailed(ecm, task="FetchSequenceByAccessionId", error="AccessionIdNotFound", cause="Accession ID NO_ACCESSION_ID not found in the index") + def test_no_coverage_error(self): + ref_host_blank = os.path.join(os.path.dirname(__file__), "blank.fastq.gz") + assembly_blank = os.path.join(os.path.dirname(__file__), "blank.fastq.gz") + vcf_blank = os.path.join(os.path.dirname(__file__), "blank.fastq.gz") + fastqs_blank = os.path.join(os.path.dirname(__file__), "blank.fastq.gz") + cleaned_bam = os.path.join(os.path.dirname(__file__), "empty.bam") + with self.assertRaises(CalledProcessError) as ecm: + res = self.run_miniwdl(task="ComputeStats", args=[ + f"ref_host={ref_host_blank}", + f"assembly={assembly_blank}", + f"vcf={vcf_blank}", + f"fastqs={fastqs_blank}", + "technology=Illumina", + f"cleaned_bam={cleaned_bam}"], + task_input={ + "sample": "sample", + "prefix": "", + }) + self.assertRunFailed(ecm, task="ComputeStats", + error="InsufficientReadsError", cause="Insufficient coverage to proceed with CG analysis") + + def test_max_reads_illumina(self): fastq_0 = os.path.join(os.path.dirname(__file__), "SRR11741455_65054_nh_R1.fastq.gz") fastq_1 = os.path.join(os.path.dirname(__file__), "SRR11741455_65054_nh_R2.fastq.gz") From 3119387b7cceb850f3c04c6ee6d5a899f029a521 Mon Sep 17 00:00:00 2001 From: Ryan Lim Date: Wed, 22 Dec 2021 20:04:03 +0000 Subject: [PATCH 3/5] linting --- consensus-genome/test/test_wdl.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/consensus-genome/test/test_wdl.py b/consensus-genome/test/test_wdl.py index ed6537c4..e357882f 100644 --- a/consensus-genome/test/test_wdl.py +++ b/consensus-genome/test/test_wdl.py @@ -170,14 +170,14 @@ def test_fetch_sequence_by_expired_accession_id(self): def test_no_coverage_error(self): ref_host_blank = os.path.join(os.path.dirname(__file__), "blank.fastq.gz") - assembly_blank = os.path.join(os.path.dirname(__file__), "blank.fastq.gz") + assembly_blank = os.path.join(os.path.dirname(__file__), "blank.fastq.gz") vcf_blank = os.path.join(os.path.dirname(__file__), "blank.fastq.gz") fastqs_blank = os.path.join(os.path.dirname(__file__), "blank.fastq.gz") cleaned_bam = os.path.join(os.path.dirname(__file__), "empty.bam") with self.assertRaises(CalledProcessError) as ecm: - res = self.run_miniwdl(task="ComputeStats", args=[ - f"ref_host={ref_host_blank}", - f"assembly={assembly_blank}", + self.run_miniwdl(task="ComputeStats", args=[ + f"ref_host={ref_host_blank}", + f"assembly={assembly_blank}", f"vcf={vcf_blank}", f"fastqs={fastqs_blank}", "technology=Illumina", @@ -189,7 +189,6 @@ def test_no_coverage_error(self): self.assertRunFailed(ecm, task="ComputeStats", error="InsufficientReadsError", cause="Insufficient coverage to proceed with CG analysis") - def test_max_reads_illumina(self): fastq_0 = os.path.join(os.path.dirname(__file__), "SRR11741455_65054_nh_R1.fastq.gz") fastq_1 = os.path.join(os.path.dirname(__file__), "SRR11741455_65054_nh_R2.fastq.gz") From 44e081916a1e4a431e7d0d9a9a62f47748426c18 Mon Sep 17 00:00:00 2001 From: Ryan Lim Date: Mon, 10 Jan 2022 21:23:59 +0000 Subject: [PATCH 4/5] modify blast version --- consensus-genome/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/consensus-genome/Dockerfile b/consensus-genome/Dockerfile index 3c29ba3e..1c897611 100644 --- a/consensus-genome/Dockerfile +++ b/consensus-genome/Dockerfile @@ -53,8 +53,8 @@ RUN apt-get -qq update && apt-get -qq -y install \ # These newer versions of infernal and ncbi-blast+ are required by VADR RUN curl -O -L https://ftp.osuosl.org/pub/ubuntu/pool/universe/i/infernal/infernal_1.1.4-1_amd64.deb && \ dpkg -i infernal_1.1.4-1_amd64.deb && \ - curl -O -L https://ftp.osuosl.org/pub/ubuntu/pool/universe/n/ncbi-blast+/ncbi-blast+_2.10.1-2_amd64.deb && \ - dpkg -i ncbi-blast+_2.10.1-2_amd64.deb + curl -O -L https://ftp.osuosl.org/pub/ubuntu/pool/universe/n/ncbi-blast+/ncbi-blast+_2.10.1+ds-1_amd64.deb && \ + dpkg -i ncbi-blast+_2.10.1+ds-1_amd64.deb # See https://github.com/ablab/quast/issues/157 RUN pip3 install multiqc==1.8 quast==5.0.2 && \ From 20de98b0c7df59b16552631afe79bb5bebee26b2 Mon Sep 17 00:00:00 2001 From: Ryan Lim Date: Mon, 10 Jan 2022 22:30:52 +0000 Subject: [PATCH 5/5] modify idseq reference --- short-read-mngs/test/postprocess/test_RunAssembly.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/short-read-mngs/test/postprocess/test_RunAssembly.py b/short-read-mngs/test/postprocess/test_RunAssembly.py index cb8164e2..e775fa14 100644 --- a/short-read-mngs/test/postprocess/test_RunAssembly.py +++ b/short-read-mngs/test/postprocess/test_RunAssembly.py @@ -10,10 +10,10 @@ def test_RunAssembly_defaults(util, short_read_mngs_bench3_viral_outputs): min_contig_length filter) """ assembly_contigs_fasta = short_read_mngs_bench3_viral_outputs["outputs"][ - "idseq_short_read_mngs.postprocess.assembly_out_assembly_contigs_fasta" + "czid_short_read_mngs.postprocess.assembly_out_assembly_contigs_fasta" ] assembly_contigs_all_fasta = short_read_mngs_bench3_viral_outputs["outputs"][ - "idseq_short_read_mngs.postprocess.assembly_out_assembly_contigs_all_fasta" + "czid_short_read_mngs.postprocess.assembly_out_assembly_contigs_all_fasta" ] assembly_contigs = list(SeqIO.parse(assembly_contigs_fasta, "fasta")) assembly_contigs_all = list(SeqIO.parse(assembly_contigs_all_fasta, "fasta"))