From 2d509f93995fb843862974d661b43b1b6c390761 Mon Sep 17 00:00:00 2001 From: jamesnemesh Date: Thu, 11 Apr 2024 13:07:02 -0400 Subject: [PATCH] SpermSeqMarkDuplicatesTest to demonstrate multiple reads overlapping a locus. (#411) --- .../SpermSeqMarkDuplicatesTest.java | 25 +++++++++++++++++- .../metrics/duplicates/test_overlap.bam | Bin 0 -> 4590 bytes 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 testdata/org/broadinstitute/spermseq/metrics/duplicates/test_overlap.bam diff --git a/src/tests/java/org/broadinstitute/dropseqrna/spermseq/metrics/duplicates/SpermSeqMarkDuplicatesTest.java b/src/tests/java/org/broadinstitute/dropseqrna/spermseq/metrics/duplicates/SpermSeqMarkDuplicatesTest.java index 58ea7b5f..d6512889 100644 --- a/src/tests/java/org/broadinstitute/dropseqrna/spermseq/metrics/duplicates/SpermSeqMarkDuplicatesTest.java +++ b/src/tests/java/org/broadinstitute/dropseqrna/spermseq/metrics/duplicates/SpermSeqMarkDuplicatesTest.java @@ -43,7 +43,7 @@ public class SpermSeqMarkDuplicatesTest { File INPUT = new File ("testdata/org/broadinstitute/spermseq/metrics/duplicates/test_sorted.bam"); - + File INPUT_SIMPLE= new File ("testdata/org/broadinstitute/spermseq/metrics/duplicates/test_overlap.bam"); @Test // tests which reads are marked as duplicates by the read position strategy. public void testDetectDuplicatesByReadPositionStrategy() throws IOException { @@ -104,6 +104,29 @@ public void testMetrics () { } + /** + * Test the simple case where the reads overlap. + */ + @Test + public void testSimpleOverlap () throws IOException { + // there should be no dupes in this file. + Set dupes = new HashSet(); + + SpermSeqMarkDuplicates d = new SpermSeqMarkDuplicates(); + d.INPUT= Collections.singletonList(INPUT_SIMPLE); + d.OUTPUT=File.createTempFile("testDetectDuplicatesByReadPositionStrategy.", ".bam"); + d.OUTPUT.deleteOnExit(); + d.OUTPUT_STATS=File.createTempFile("testDetectDuplicatesByReadPositionStrategy.", ".pcr_duplicate_metrics"); + d.OUTPUT_STATS.deleteOnExit(); + Assert.assertEquals(0, d.doWork()); + + SamReader inputSam = SamReaderFactory.makeDefault().open(d.OUTPUT); + for (SAMRecord r: inputSam) { + boolean duplicateReadFlag = r.getDuplicateReadFlag(); + Assert.assertFalse(duplicateReadFlag); + } + } + } diff --git a/testdata/org/broadinstitute/spermseq/metrics/duplicates/test_overlap.bam b/testdata/org/broadinstitute/spermseq/metrics/duplicates/test_overlap.bam new file mode 100644 index 0000000000000000000000000000000000000000..d01a5033a8c9646c998ceda9949a94690b4ecada GIT binary patch literal 4590 zcmV)KPDs;tkN z1$`R?Wxy954HEp;KIhcbZaKYu$xcIaR{gd1+TZ#2|9}7Y@3rc7Z~K|IZdKK-mo}H~ z?yS)ju{7LW3#X^!c=XUs(v4fgmzRe4#nRT!8WWaDq8ME|++TZRGC5mYU48uc@roax zI!r&G98E4JY2|c$e>G3eR?jZ{>UsLa>Nw>zP7gymUw!HHC{>B?zFKx>;>KXIagqXZ=mJ<=YjKUD6oRO57rQzOMS#EhaU2cw) zO_6eHE!VsYq>e-jNYN1@2#G0BVvU2Vqzjs|vfnCCXTho4ROp#I0py#!>6&BBZiLYKWzhsZM-K zv>;UT=xxrCDG@a@nKNyK5i&%*2&t-(Y6_LjBSHC(A=C6CHE7p;yy39WTtNNTw-qfCudD6RyOstcr6Wl6zLNTVbu z2}((-toAWQX6 zodl{iCNPr-A<2lqRFsl&xHHq_SfX(uR62u7NO=t1KnJ^7N^5h)bF|0gEDNsez!TJQ1%MO8Y|kXLY4g z962RfH`)(k-zr5Q`BQc-bIuG6BudmNsYqUkv@TS_P-Z(Ke`d9HiWup0LTTkLLbynl zxuPj*vDl||osty&msFipRHvzN0m*|po?~!^OC1YrD;TA5A<}zOl`5GgvK!^E6G>>A z(7!?OX-MhBY4pL6z)*Y^BYkC>Q0PuOaw*NO)^3uxOe|LuM)b-wtC;Gfnr3b&T0EMm<)G7m zwv=cVZ8A{Qd2!!2Ma$F_Et)^l3H{SROSz{xfb9ty7O$KTI-?zo0I^X0&EVP@Tni%Z z0xrsu0Ij0yM^e#fl7?JCu!^9GLJ#nCxSUSmqT?~0WSDox7#6^*r4TS;NRp_u5LQSY z<668+$sDdSoy0C00ZNq90y>ZgbfF+y8WM9Tw`dF&YJr(|XTgBxg^5lyHj0--->M0N z)D9v$8bqQp8^>%QEvd%k3@*aG6Sx!wjLG2eTtoIDX0HNE-ZNpdRfe?i*erHYnY$5& zz^9W=0eS`X)}x7qhP6;IMo5uT3+)O$hby{y;{ryg`O|wHGYwKBYl2f~6urx!vKF}W zU??wYI!QI_gQ6DB3>ov3qSt@0?LGII&FYmIloYl-j`8V|U|yZq9-P|KafE~g$^4BVhe2nIUP zD35xk0~emQ5*eE{6NHjZ^e$1sXXr50(`YQ%*FohZ?;<*Hw7zYMOw8fRZZ;Dv&(<2r zl~>eBl_A%&(3~-6q1Huam^a62gz5V=wF(#WD!qxeY)2y_A?p-SsEy-Vk>8E7}Lo3dcgI59{kWI!;69VhR*|I0k!u0PqfL?VdF)EtP3-ENq9V*1XQE;`8)CdJM)&=1La!NPF{@&1+l;xn)l2SdJ3C7 zN-=>-=hQmYQYzRyex(GLbfVGGBWBPo)Qxt@NS$ralAz8}>78$D*ybA)($a2rt1_A` zYZ7t{LBLcp!r|Gd| zl-)MwTJX;E4Oe!vDXgP&rK3=YUSo{V-g?fwRU#?I z7ngX^40W^3#+VS4TNAAfE-TFw0@~YhZos6_vTsG_`KB%HW@{x|ty7GYdl%4w1rHS= z93HNY-cjG;UnVZ`D%*|zm7Znjo2(EOo>ArmJmrYaLXyN<;+M_qg&{(A62IKqm>uEh zFBtCcykyP@m344jdj530)XwAAFk`#fu81}TLNwaA2p&ryV!=Hj5N86-d|UjoIbLOC zC-Ey)c>$%$X`JYzvJ|>EM+G&((IJ_({^AN=)yc~TmB9yfu9bt?soLN%GaR2Oll8zqUyzR#0 zuplHgN63C){c&kFHVjgx~09tOY_$4HAW~pZg4fhFAwkfi{p>` zvnHrX}H}oVPj|Q_Ta{?z4~tcF;?HHKkn|Vkrln7Z;qjWmNvH5YCoKx=E-9> zPOE1}!Hwf;xERANlT|*Bqcb<2yt#^NMr3kLc5#yS!RIrPY&w zdyh|#k5}EJG=6jS{49-6YIH{r?;nFL@S8~rZ^ZPd+&a36Tb(;$^{L)inYjD+Qyk@^ z;|b8l(bPN{ADx_}SflvF+*jLf{PE3;vtu}7lFp0Up0w=E9Z6S=rXLkDMyoOjs1!`7 z5TX;r@hpvJQ6exjhDgN`72K4Q)Qmn~96xpS%DFpPIl&#`>VDgzxMt2 zQqwCZX$+sOcz5#p^6ue5!SDluP}~FgdvSJN&@6BF5B3Ke!_kX_t^N-R62P1!*uweB zL4UZO-SK%^?hS{B+x<}i(chfe zE+>tKxL?6GJOG;hombW|d9?fD=yq?|AH96o+Zr4U`orb=Ta~*4;hp}y_1E0ZPxWrT zN^aWMUs!vse5`%6{&?Z}|2tUux#iux1Ke-x19YRk!M*-g@$Eqmo?fT$)7}35?cL$v z;Fa~k&WpRtFCPy22Q$ET0c5i`b=^S$jK>44C4df=H+Hx8_WQ$O`R!K!ZhvcC*3V|@ z%;9LT16OZP9~xQSz<0s@2Robi9@EulrtM8R4aTYUPEXu2b|9AB^{&pzAs%!X9tM!e4{74zG>a*|tN!GNA_Srlg%Iemqe`-2ZRqs#zhw7oOzWwii z`$P56RJUIL>U3zUFMjMZ*UONR>gT|7?{;kGZ=m@G$eZFyF z`9e3o&@dHT`L`Qum08z*r!mXG^x&iM@DcPw80&L1=`EVKTmVJhcIs;~WF!&E%6{q=^a-0}K18f%s1|EOsy zUVY(D8m6+=MZ;9~sjDxoKXcWE#S`z2p1C@!n11X*V^)Fs&X=#wD%*zDqyK1_%7wr9 zUsp|K^Q-#LpS*T;R$1#qqpPN}g;TwAa@ACJNvdDEXk1vn_Kiy0D@m+KM>a4Q- z*Is?Dx&hN=(^0K{`C9eA_xxKbB1QlJABzYC000000RIL6LPG)oZ)qW zs_Lrh>S{owqN<=E$OL4nsj5MMDhMd50>u>-6(XFY+#J1K4H