From c212056e866d83e383db0928d4c55ef082a040a8 Mon Sep 17 00:00:00 2001
From: Ryan Lim <rlim@chanzuckerberg.com>
Date: Wed, 22 Dec 2021 01:57:46 +0000
Subject: [PATCH 1/5] add error function to python

---
 consensus-genome/run.wdl        |   6 ++++--
 consensus-genome/test/empty.bam | Bin 0 -> 146 bytes
 2 files changed, 4 insertions(+), 2 deletions(-)
 create mode 100644 consensus-genome/test/empty.bam

diff --git a/consensus-genome/run.wdl b/consensus-genome/run.wdl
index b12521bd..05865299 100644
--- a/consensus-genome/run.wdl
+++ b/consensus-genome/run.wdl
@@ -897,18 +897,20 @@ task ComputeStats {
         import json
         import re
         import pysam
+        import sys
         from Bio import SeqIO
         import numpy as np
         from matplotlib import pyplot as plt
         import seaborn as sns
-
+        
+        error = lambda err, cause: sys.exit(json.dumps(dict(wdl_error_message=True, error=err, cause=cause)))
         stats = {"sample_name": "~{sample}"}
 
         depths = open("~{prefix}samtools_depth.txt").read().splitlines()
         if depths:
             depths = np.array([int(d) for d in depths])
         else:
-            raise Exception("Insufficient coverage to proceed with CG analysis")
+            error("InsufficientReadsError", "Insufficient coverage to proceed with CG analysis")
 
         stats["depth_avg"] = depths.mean()
         stats["depth_q.25"] = np.quantile(depths, .25)
diff --git a/consensus-genome/test/empty.bam b/consensus-genome/test/empty.bam
new file mode 100644
index 0000000000000000000000000000000000000000..4ddd52811c66498714b6b07dadbabc0c7e19c1c9
GIT binary patch
literal 146
zcmb2|=3rp}f&Xj_PR>jWr3}SIUsBJcCL|PmC<tU|G_copwl!vLO{ib}<j<c)Q_k!;
zGATr_{ei=VoDK7*uAjfOv2|(Z&UuCt-ak%HP07qmIP+BC318TQmq}Bf&yio2osy6t
hncot7V*+=EJTpVbz75=!K+ELOY?WqU20H>o008xYHC_M!

literal 0
HcmV?d00001


From 3d8c1825b940acb6ddee46a4a3fa17b4c96fa71c Mon Sep 17 00:00:00 2001
From: Ryan Lim <rlim@chanzuckerberg.com>
Date: Wed, 22 Dec 2021 19:08:34 +0000
Subject: [PATCH 2/5] add test

---
 consensus-genome/test/test_wdl.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/consensus-genome/test/test_wdl.py b/consensus-genome/test/test_wdl.py
index 6c723038..ed6537c4 100644
--- a/consensus-genome/test/test_wdl.py
+++ b/consensus-genome/test/test_wdl.py
@@ -168,6 +168,28 @@ def test_fetch_sequence_by_expired_accession_id(self):
         self.assertRunFailed(ecm, task="FetchSequenceByAccessionId",
                              error="AccessionIdNotFound", cause="Accession ID NO_ACCESSION_ID not found in the index")
 
+    def test_no_coverage_error(self):
+        ref_host_blank = os.path.join(os.path.dirname(__file__), "blank.fastq.gz")
+        assembly_blank = os.path.join(os.path.dirname(__file__), "blank.fastq.gz") 
+        vcf_blank = os.path.join(os.path.dirname(__file__), "blank.fastq.gz")
+        fastqs_blank = os.path.join(os.path.dirname(__file__), "blank.fastq.gz")
+        cleaned_bam = os.path.join(os.path.dirname(__file__), "empty.bam")
+        with self.assertRaises(CalledProcessError) as ecm:
+            res = self.run_miniwdl(task="ComputeStats", args=[
+                f"ref_host={ref_host_blank}", 
+                f"assembly={assembly_blank}", 
+                f"vcf={vcf_blank}",
+                f"fastqs={fastqs_blank}",
+                "technology=Illumina",
+                f"cleaned_bam={cleaned_bam}"],
+                task_input={
+                    "sample": "sample",
+                    "prefix": "",
+                })
+        self.assertRunFailed(ecm, task="ComputeStats",
+                             error="InsufficientReadsError", cause="Insufficient coverage to proceed with CG analysis")
+
+
     def test_max_reads_illumina(self):
         fastq_0 = os.path.join(os.path.dirname(__file__), "SRR11741455_65054_nh_R1.fastq.gz")
         fastq_1 = os.path.join(os.path.dirname(__file__), "SRR11741455_65054_nh_R2.fastq.gz")

From 3119387b7cceb850f3c04c6ee6d5a899f029a521 Mon Sep 17 00:00:00 2001
From: Ryan Lim <rlim@chanzuckerberg.com>
Date: Wed, 22 Dec 2021 20:04:03 +0000
Subject: [PATCH 3/5] linting

---
 consensus-genome/test/test_wdl.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/consensus-genome/test/test_wdl.py b/consensus-genome/test/test_wdl.py
index ed6537c4..e357882f 100644
--- a/consensus-genome/test/test_wdl.py
+++ b/consensus-genome/test/test_wdl.py
@@ -170,14 +170,14 @@ def test_fetch_sequence_by_expired_accession_id(self):
 
     def test_no_coverage_error(self):
         ref_host_blank = os.path.join(os.path.dirname(__file__), "blank.fastq.gz")
-        assembly_blank = os.path.join(os.path.dirname(__file__), "blank.fastq.gz") 
+        assembly_blank = os.path.join(os.path.dirname(__file__), "blank.fastq.gz")
         vcf_blank = os.path.join(os.path.dirname(__file__), "blank.fastq.gz")
         fastqs_blank = os.path.join(os.path.dirname(__file__), "blank.fastq.gz")
         cleaned_bam = os.path.join(os.path.dirname(__file__), "empty.bam")
         with self.assertRaises(CalledProcessError) as ecm:
-            res = self.run_miniwdl(task="ComputeStats", args=[
-                f"ref_host={ref_host_blank}", 
-                f"assembly={assembly_blank}", 
+            self.run_miniwdl(task="ComputeStats", args=[
+                f"ref_host={ref_host_blank}",
+                f"assembly={assembly_blank}",
                 f"vcf={vcf_blank}",
                 f"fastqs={fastqs_blank}",
                 "technology=Illumina",
@@ -189,7 +189,6 @@ def test_no_coverage_error(self):
         self.assertRunFailed(ecm, task="ComputeStats",
                              error="InsufficientReadsError", cause="Insufficient coverage to proceed with CG analysis")
 
-
     def test_max_reads_illumina(self):
         fastq_0 = os.path.join(os.path.dirname(__file__), "SRR11741455_65054_nh_R1.fastq.gz")
         fastq_1 = os.path.join(os.path.dirname(__file__), "SRR11741455_65054_nh_R2.fastq.gz")

From 44e081916a1e4a431e7d0d9a9a62f47748426c18 Mon Sep 17 00:00:00 2001
From: Ryan Lim <rlim@chanzuckerberg.com>
Date: Mon, 10 Jan 2022 21:23:59 +0000
Subject: [PATCH 4/5] modify blast version

---
 consensus-genome/Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/consensus-genome/Dockerfile b/consensus-genome/Dockerfile
index 3c29ba3e..1c897611 100644
--- a/consensus-genome/Dockerfile
+++ b/consensus-genome/Dockerfile
@@ -53,8 +53,8 @@ RUN apt-get -qq update && apt-get -qq -y install \
 # These newer versions of infernal and ncbi-blast+ are required by VADR
 RUN curl -O -L https://ftp.osuosl.org/pub/ubuntu/pool/universe/i/infernal/infernal_1.1.4-1_amd64.deb && \
   dpkg -i infernal_1.1.4-1_amd64.deb && \
-  curl -O -L https://ftp.osuosl.org/pub/ubuntu/pool/universe/n/ncbi-blast+/ncbi-blast+_2.10.1-2_amd64.deb && \
-  dpkg -i ncbi-blast+_2.10.1-2_amd64.deb
+  curl -O -L https://ftp.osuosl.org/pub/ubuntu/pool/universe/n/ncbi-blast+/ncbi-blast+_2.10.1+ds-1_amd64.deb && \
+  dpkg -i ncbi-blast+_2.10.1+ds-1_amd64.deb
 
 # See https://github.com/ablab/quast/issues/157
 RUN pip3 install multiqc==1.8 quast==5.0.2 && \

From 20de98b0c7df59b16552631afe79bb5bebee26b2 Mon Sep 17 00:00:00 2001
From: Ryan Lim <rlim@chanzuckerberg.com>
Date: Mon, 10 Jan 2022 22:30:52 +0000
Subject: [PATCH 5/5] modify idseq reference

---
 short-read-mngs/test/postprocess/test_RunAssembly.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/short-read-mngs/test/postprocess/test_RunAssembly.py b/short-read-mngs/test/postprocess/test_RunAssembly.py
index cb8164e2..e775fa14 100644
--- a/short-read-mngs/test/postprocess/test_RunAssembly.py
+++ b/short-read-mngs/test/postprocess/test_RunAssembly.py
@@ -10,10 +10,10 @@ def test_RunAssembly_defaults(util, short_read_mngs_bench3_viral_outputs):
     min_contig_length filter)
     """
     assembly_contigs_fasta = short_read_mngs_bench3_viral_outputs["outputs"][
-        "idseq_short_read_mngs.postprocess.assembly_out_assembly_contigs_fasta"
+        "czid_short_read_mngs.postprocess.assembly_out_assembly_contigs_fasta"
     ]
     assembly_contigs_all_fasta = short_read_mngs_bench3_viral_outputs["outputs"][
-        "idseq_short_read_mngs.postprocess.assembly_out_assembly_contigs_all_fasta"
+        "czid_short_read_mngs.postprocess.assembly_out_assembly_contigs_all_fasta"
     ]
     assembly_contigs = list(SeqIO.parse(assembly_contigs_fasta, "fasta"))
     assembly_contigs_all = list(SeqIO.parse(assembly_contigs_all_fasta, "fasta"))