Skip to content

Commit

Permalink
Fix for #424: move STARSoloChimericReadFilteringIterator down
Browse files Browse the repository at this point in the history
so reads without UMIs don't reach it.
  • Loading branch information
alecw committed Jun 3, 2024
1 parent 8c54d8b commit aa36be9
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -98,21 +98,21 @@ public SNPUMIBasePileupIterator (final SamHeaderAndIterator headerAndIter, final

// add a progress logger.
final ProgressLoggingIterator loggingIterator = new ProgressLoggingIterator (headerAndIter.iterator, logger);
final STARSoloChimericReadFilteringIterator chimericReadFilteringIterator = new STARSoloChimericReadFilteringIterator(loggingIterator, molecularBarcodeTag);

// filter out to contigs in the interval list.
Set<String> contigsToFilter = snpIntervals.getIntervals().stream().map(x -> x.getContig()).collect(Collectors.toSet());
ChromosomeFilteringIterator cfi = new ChromosomeFilteringIterator(chimericReadFilteringIterator, contigsToFilter, false);
ChromosomeFilteringIterator cfi = new ChromosomeFilteringIterator(loggingIterator, contigsToFilter, false);

// Filter reads on map quality
MapQualityFilteredIterator filteringIterator = new MapQualityFilteredIterator(cfi, readMQ, true);

// Filter records before sorting, to reduce I/O
final MissingTagFilteringIterator filteringIterator2 =
new MissingTagFilteringIterator(filteringIterator, geneTag, cellBarcodeTag, molecularBarcodeTag);


final STARSoloChimericReadFilteringIterator chimericReadFilteringIterator = new STARSoloChimericReadFilteringIterator(filteringIterator2, molecularBarcodeTag);
// Filter/assign reads based on functional annotations
GeneFunctionIteratorWrapper gfteratorWrapper = new GeneFunctionIteratorWrapper(filteringIterator2, geneTag, geneStrandTag, geneFunctionTag, assignReadsToAllGenes, strandStrategy, acceptedLociFunctions, functionStrategy);
GeneFunctionIteratorWrapper gfteratorWrapper = new GeneFunctionIteratorWrapper(chimericReadFilteringIterator, geneTag, geneStrandTag, geneFunctionTag, assignReadsToAllGenes, strandStrategy, acceptedLociFunctions, functionStrategy);

/**
* In this section, data is sorted into the standard cell / gene / umi order and grouped so that all reads for a single UMI can be evaluated at once.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,26 @@ public void testPileups() {
}
Assert.assertEquals(expectedPileups, count);
}


@Test
public void testMissingUMITag() {
List<String> cellBarcodes = ParseBarcodeFile.readCellBarcodeFile(cellBCFile);
IntervalList snpIntervals = IntervalList.fromFile(snpIntervalsFile);

// all SNPs equal quality. No SNPs overlap on UMIs so all pileups are seen.
Map<Interval, Double> meanGenotypeQuality = new HashMap<Interval, Double>();
for (Interval i: snpIntervals) {
meanGenotypeQuality.put(i, 30d);
}

SNPUMIBasePileupIterator sbpi = new SNPUMIBasePileupIterator(
new SamHeaderAndIterator(smallBAMFile), snpIntervals, GENE_NAME_TAG, GENE_STRAND_TAG, GENE_FUNCTION_TAG,
LOCUS_FUNCTION_LIST, STRAND_STRATEGY, GeneFunctionCommandLineBase.DEFAULT_FUNCTIONAL_STRATEGY, cellBarcodeTag,
"XQ", snpTag, functionTag, readMQ, assignReadsToAllGenes, cellBarcodes, meanGenotypeQuality,
SortOrder.SNP_GENE);
Assert.assertFalse(sbpi.hasNext());

}

// for reads that have more than 1 SNP in the data, filter the SNPs tagging the read to a single SNP.
// This means the read only contributes to a single pileup.
Expand Down

0 comments on commit aa36be9

Please sign in to comment.