From aa36be93a32697b1b1eb74ecfc54d4129249da08 Mon Sep 17 00:00:00 2001 From: Alec Wysoker Date: Mon, 3 Jun 2024 10:28:50 -0400 Subject: [PATCH] Fix for #424: move STARSoloChimericReadFilteringIterator down so reads without UMIs don't reach it. --- .../SNPUMIBasePileupIterator.java | 8 +++---- .../SNPUMIBasePileupIteratorTest.java | 21 ++++++++++++++++++- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/java/org/broadinstitute/dropseqrna/barnyard/digitalallelecounts/SNPUMIBasePileupIterator.java b/src/java/org/broadinstitute/dropseqrna/barnyard/digitalallelecounts/SNPUMIBasePileupIterator.java index 601e52b9..1c3c71b5 100644 --- a/src/java/org/broadinstitute/dropseqrna/barnyard/digitalallelecounts/SNPUMIBasePileupIterator.java +++ b/src/java/org/broadinstitute/dropseqrna/barnyard/digitalallelecounts/SNPUMIBasePileupIterator.java @@ -98,11 +98,10 @@ public SNPUMIBasePileupIterator (final SamHeaderAndIterator headerAndIter, final // add a progress logger. final ProgressLoggingIterator loggingIterator = new ProgressLoggingIterator (headerAndIter.iterator, logger); - final STARSoloChimericReadFilteringIterator chimericReadFilteringIterator = new STARSoloChimericReadFilteringIterator(loggingIterator, molecularBarcodeTag); // filter out to contigs in the interval list. Set contigsToFilter = snpIntervals.getIntervals().stream().map(x -> x.getContig()).collect(Collectors.toSet()); - ChromosomeFilteringIterator cfi = new ChromosomeFilteringIterator(chimericReadFilteringIterator, contigsToFilter, false); + ChromosomeFilteringIterator cfi = new ChromosomeFilteringIterator(loggingIterator, contigsToFilter, false); // Filter reads on map quality MapQualityFilteredIterator filteringIterator = new MapQualityFilteredIterator(cfi, readMQ, true); @@ -110,9 +109,10 @@ public SNPUMIBasePileupIterator (final SamHeaderAndIterator headerAndIter, final // Filter records before sorting, to reduce I/O final MissingTagFilteringIterator filteringIterator2 = new MissingTagFilteringIterator(filteringIterator, geneTag, cellBarcodeTag, molecularBarcodeTag); - + + final STARSoloChimericReadFilteringIterator chimericReadFilteringIterator = new STARSoloChimericReadFilteringIterator(filteringIterator2, molecularBarcodeTag); // Filter/assign reads based on functional annotations - GeneFunctionIteratorWrapper gfteratorWrapper = new GeneFunctionIteratorWrapper(filteringIterator2, geneTag, geneStrandTag, geneFunctionTag, assignReadsToAllGenes, strandStrategy, acceptedLociFunctions, functionStrategy); + GeneFunctionIteratorWrapper gfteratorWrapper = new GeneFunctionIteratorWrapper(chimericReadFilteringIterator, geneTag, geneStrandTag, geneFunctionTag, assignReadsToAllGenes, strandStrategy, acceptedLociFunctions, functionStrategy); /** * In this section, data is sorted into the standard cell / gene / umi order and grouped so that all reads for a single UMI can be evaluated at once. diff --git a/src/tests/java/org/broadinstitute/dropseqrna/barnyard/digitalallelecounts/SNPUMIBasePileupIteratorTest.java b/src/tests/java/org/broadinstitute/dropseqrna/barnyard/digitalallelecounts/SNPUMIBasePileupIteratorTest.java index d466bd5b..8a1947c5 100644 --- a/src/tests/java/org/broadinstitute/dropseqrna/barnyard/digitalallelecounts/SNPUMIBasePileupIteratorTest.java +++ b/src/tests/java/org/broadinstitute/dropseqrna/barnyard/digitalallelecounts/SNPUMIBasePileupIteratorTest.java @@ -89,7 +89,26 @@ public void testPileups() { } Assert.assertEquals(expectedPileups, count); } - + + @Test + public void testMissingUMITag() { + List cellBarcodes = ParseBarcodeFile.readCellBarcodeFile(cellBCFile); + IntervalList snpIntervals = IntervalList.fromFile(snpIntervalsFile); + + // all SNPs equal quality. No SNPs overlap on UMIs so all pileups are seen. + Map meanGenotypeQuality = new HashMap(); + for (Interval i: snpIntervals) { + meanGenotypeQuality.put(i, 30d); + } + + SNPUMIBasePileupIterator sbpi = new SNPUMIBasePileupIterator( + new SamHeaderAndIterator(smallBAMFile), snpIntervals, GENE_NAME_TAG, GENE_STRAND_TAG, GENE_FUNCTION_TAG, + LOCUS_FUNCTION_LIST, STRAND_STRATEGY, GeneFunctionCommandLineBase.DEFAULT_FUNCTIONAL_STRATEGY, cellBarcodeTag, + "XQ", snpTag, functionTag, readMQ, assignReadsToAllGenes, cellBarcodes, meanGenotypeQuality, + SortOrder.SNP_GENE); + Assert.assertFalse(sbpi.hasNext()); + + } // for reads that have more than 1 SNP in the data, filter the SNPs tagging the read to a single SNP. // This means the read only contributes to a single pileup.