From 0bbfcd48a2829e5be340bbbb5e9f3efca6573560 Mon Sep 17 00:00:00 2001 From: Mark Tomko Date: Mon, 5 Feb 2024 11:44:05 -0800 Subject: [PATCH] Extra test for unexpected sequence writer + bugfix --- .../reports/UnexpectedSequenceWriter.scala | 2 +- .../unexpected-sequences/unexpected-AAAA.txt | 26 +++++++++ .../unexpected-sequences/unexpected-AAAT.txt | 0 .../unexpected-sequences/unexpected-CCCC.txt | 0 .../unexpected-sequences/unexpected-CCCG.txt | 26 +++++++++ .../reports/UnexpectedSequencesTest.scala | 55 ++++++++++++++----- 6 files changed, 94 insertions(+), 15 deletions(-) create mode 100644 src/test/resources/org/broadinstitute/gpp/poolq3/reports/unexpected-sequences/unexpected-AAAA.txt create mode 100644 src/test/resources/org/broadinstitute/gpp/poolq3/reports/unexpected-sequences/unexpected-AAAT.txt create mode 100644 src/test/resources/org/broadinstitute/gpp/poolq3/reports/unexpected-sequences/unexpected-CCCC.txt create mode 100644 src/test/resources/org/broadinstitute/gpp/poolq3/reports/unexpected-sequences/unexpected-CCCG.txt diff --git a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/UnexpectedSequenceWriter.scala b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/UnexpectedSequenceWriter.scala index d5c409c..4221ad7 100644 --- a/src/main/scala/org/broadinstitute/gpp/poolq3/reports/UnexpectedSequenceWriter.scala +++ b/src/main/scala/org/broadinstitute/gpp/poolq3/reports/UnexpectedSequenceWriter.scala @@ -60,7 +60,7 @@ object UnexpectedSequenceWriter { val ret = mutable.HashSet[String]() colReference.allBarcodes.foreach { dnaBarcode => val linesToRead: Int = - math.floor((unexpectedCountsByBarcode.getOrElse(dnaBarcode, 0) * samplePct).toDouble).toInt + math.ceil((unexpectedCountsByBarcode.getOrElse(dnaBarcode, 0) * samplePct).toDouble).toInt val file = cacheDir.resolve(nameFor(dnaBarcode)) if (Files.exists(file)) { Using.resource(Source.fromFile(file.toFile)) { src => diff --git a/src/test/resources/org/broadinstitute/gpp/poolq3/reports/unexpected-sequences/unexpected-AAAA.txt b/src/test/resources/org/broadinstitute/gpp/poolq3/reports/unexpected-sequences/unexpected-AAAA.txt new file mode 100644 index 0000000..83639ed --- /dev/null +++ b/src/test/resources/org/broadinstitute/gpp/poolq3/reports/unexpected-sequences/unexpected-AAAA.txt @@ -0,0 +1,26 @@ +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +CCCCCCCCAAAAAAAAAAAA +CCCCCCCCAAAAAAAAAAAA +CCCCCCCCAAAAAAAAAAAA +CCCCCCCCAAAAAAAAAAAA +CCCCCCCCAAAAAAAAAAAA +CCCCCCCCAAAAAAAAAAAA +CCCCCCCCAAAAAAAAAAAA +CCCCCCCCAAAAAAAAAAAA +CCCCCCCCAAAAAAAAAAAA +CCCCCCCCAAAAAAAAAAAA +CCCCCCCCAAAAAAAAAAAA +CCCCCCCCAAAAAAAAAAAA +CCCCCCCCAAAAAAAAAAAA +CCCCCCCCAAAAAAAAAAAA +CCCCCCCCAAAAAAAAAAAA +CCCCCCCCAAAAAAAAAAAA +CCCCCCCCAAAAAAAAAAAA diff --git a/src/test/resources/org/broadinstitute/gpp/poolq3/reports/unexpected-sequences/unexpected-AAAT.txt b/src/test/resources/org/broadinstitute/gpp/poolq3/reports/unexpected-sequences/unexpected-AAAT.txt new file mode 100644 index 0000000..e69de29 diff --git a/src/test/resources/org/broadinstitute/gpp/poolq3/reports/unexpected-sequences/unexpected-CCCC.txt b/src/test/resources/org/broadinstitute/gpp/poolq3/reports/unexpected-sequences/unexpected-CCCC.txt new file mode 100644 index 0000000..e69de29 diff --git a/src/test/resources/org/broadinstitute/gpp/poolq3/reports/unexpected-sequences/unexpected-CCCG.txt b/src/test/resources/org/broadinstitute/gpp/poolq3/reports/unexpected-sequences/unexpected-CCCG.txt new file mode 100644 index 0000000..dd50d7a --- /dev/null +++ b/src/test/resources/org/broadinstitute/gpp/poolq3/reports/unexpected-sequences/unexpected-CCCG.txt @@ -0,0 +1,26 @@ +GATGTGCAGTGAGTAGCGAG +GATGTGCAGTGAGTAGCGAG +GATGTGCAGTGAGTAGCGAG +GATGTGCAGTGAGTAGCGAG +GATGTGCAGTGAGTAGCGAG +GATGTGCAGTGAGTAGCGAG +GATGTGCAGTGAGTAGCGAG +GATGTGCAGTGAGTAGCGAG +GATGTGCAGTGAGTAGCGAG +GGGGGGGGGGTTTTTTTTTT +GGGGGGGGGGTTTTTTTTTT +GGGGGGGGGGTTTTTTTTTT +GGGGGGGGGGTTTTTTTTTT +GGGGGGGGGGTTTTTTTTTT +GGGGGGGGGGTTTTTTTTTT +GGGGGGGGGGTTTTTTTTTT +GGGGGGGGGGTTTTTTTTTT +GGGGGGGGGGTTTTTTTTTT +GGGGGGGGGGTTTTTTTTTT +GGGGGGGGGGTTTTTTTTTT +GGGGGGGGGGTTTTTTTTTT +GGGGGGGGGGTTTTTTTTTT +GGGGGGGGGGTTTTTTTTTT +GGGGGGGGGGTTTTTTTTTT +GGGGGGGGGGTTTTTTTTTT +GGGGGGGGGGTTTTTTTTTT diff --git a/src/test/scala/org/broadinstitute/gpp/poolq3/reports/UnexpectedSequencesTest.scala b/src/test/scala/org/broadinstitute/gpp/poolq3/reports/UnexpectedSequencesTest.scala index a76850b..546ce88 100644 --- a/src/test/scala/org/broadinstitute/gpp/poolq3/reports/UnexpectedSequencesTest.scala +++ b/src/test/scala/org/broadinstitute/gpp/poolq3/reports/UnexpectedSequencesTest.scala @@ -12,13 +12,13 @@ import scala.util.{Random, Using} import better.files._ import munit.{FunSuite, Location} -import org.broadinstitute.gpp.poolq3.PoolQ import org.broadinstitute.gpp.poolq3.barcode.{Barcodes, FoundBarcode} import org.broadinstitute.gpp.poolq3.parser.{CloseableIterable, ReferenceEntry} import org.broadinstitute.gpp.poolq3.process.{ScoringConsumer, UnexpectedSequenceTracker} import org.broadinstitute.gpp.poolq3.reference.{ExactReference, VariantReference} +import org.broadinstitute.gpp.poolq3.{PoolQ, TestResources} -class UnexpectedSequencesTest extends FunSuite { +class UnexpectedSequencesTest extends FunSuite with TestResources { private[this] val rowReferenceBarcodes = List("AAAAAAAAAAAAAAAAAAAA", "AAAAAAAAAAAAAAAAAAAC", "AAAAAAAAAAAAAAAAAAAG", "AAAAAAAAAAAAAAAAAAAT").map(b => @@ -91,6 +91,40 @@ class UnexpectedSequencesTest extends FunSuite { } + test("read unexpected sequence cache") { + val cachePath = resourcePath("unexpected-sequences") + val outputFile = Files.createTempFile("unexpected", ".txt") + try { + val unexpectedReadCount = 9 + + UnexpectedSequenceWriter + .write( + outputFile, + cachePath, + Map("AAAA" -> unexpectedReadCount, "CCCG" -> unexpectedReadCount, "AAAT" -> 0, "CCCC" -> 0), + 100, + colReference, + Some(globalReference), + 0.02f + ) + .get + + val expected = + s"""Sequence\tTotal\tAAAA\tAAAT\tCCCC\tCCCG\tPotential IDs + |GATGTGCAGTGAGTAGCGAG\t$unexpectedReadCount\t0\t0\t0\t$unexpectedReadCount\tOh, that one + |TTTTTTTTTTTTTTTTTTTT\t$unexpectedReadCount\t$unexpectedReadCount\t0\t0\t0\t + |""".stripMargin + + Using.resource(Source.fromFile(outputFile.toFile)) { contents => + // now check the contents + val actual = contents.mkString + assertEquals(actual, expected) + } + } finally { + val _ = Files.deleteIfExists(outputFile) + } + } + private def testIt(underlyingBarcodes: List[(String, String)], samplePct: Float, unexpectedReadCount: Int)(implicit loc: Location ): Unit = { @@ -106,17 +140,11 @@ class UnexpectedSequencesTest extends FunSuite { new ScoringConsumer(rowReference, colReference, countAmbiguous = true, false, None, Some(ust), false) // run PoolQ and write the file - val _ = PoolQ.runProcess(barcodes, consumer) - - val _ = UnexpectedSequenceWriter.write( - outputFile, - cachePath, - ust.unexpectedBarcodeCounts, - 100, - colReference, - Some(globalReference), - samplePct - ) + val _ = PoolQ.runProcess(barcodes, consumer).get + + UnexpectedSequenceWriter + .write(outputFile, cachePath, ust.unexpectedBarcodeCounts, 100, colReference, Some(globalReference), samplePct) + .get val expected = s"""Sequence\tTotal\tAAAA\tAAAT\tCCCC\tCCCG\tPotential IDs @@ -129,7 +157,6 @@ class UnexpectedSequencesTest extends FunSuite { val actual = contents.mkString assertEquals(actual, expected) } - } finally { val _ = tmpPath.toFile.toScala.delete() }