diff --git a/src/java/org/broadinstitute/dropseqrna/utils/ByteArrayWrapper.java b/src/java/org/broadinstitute/dropseqrna/utils/ByteArrayWrapper.java index 66497e1e..601d12b5 100644 --- a/src/java/org/broadinstitute/dropseqrna/utils/ByteArrayWrapper.java +++ b/src/java/org/broadinstitute/dropseqrna/utils/ByteArrayWrapper.java @@ -31,13 +31,4 @@ public byte[] getData() { return data; } - public static void main(String[] args) { - ByteArrayWrapper wrapper1 = new ByteArrayWrapper(new byte[]{1, 2, 3}); - ByteArrayWrapper wrapper2 = new ByteArrayWrapper(new byte[]{1, 2, 4}); - ByteArrayWrapper wrapper3 = new ByteArrayWrapper(new byte[]{1, 2, 3}); - - System.out.println(wrapper1.compareTo(wrapper2)); // Output: -1 (wrapper1 < wrapper2) - System.out.println(wrapper1.compareTo(wrapper3)); // Output: 0 (wrapper1 == wrapper3) - System.out.println(wrapper2.compareTo(wrapper1)); // Output: 1 (wrapper2 > wrapper1) - } } diff --git a/src/java/org/broadinstitute/dropseqrna/utils/CompareBAMTagValues.java b/src/java/org/broadinstitute/dropseqrna/utils/CompareBAMTagValues.java index 8a776d90..485a4eda 100644 --- a/src/java/org/broadinstitute/dropseqrna/utils/CompareBAMTagValues.java +++ b/src/java/org/broadinstitute/dropseqrna/utils/CompareBAMTagValues.java @@ -303,7 +303,7 @@ private List getTagValuesReportLine(ByteArrayWrapper key) { List tagValues; switch (mode) { case FIXED_LENGTH_COMPRESSION -> tagValues = DNACompressor.decompressList(key.getData(), this.tagLengths); - case VARYING_LENGTH_COMPRESSION -> tagValues = DNACompressorVaryingLengths.decompressList(key.getData()); + // case VARYING_LENGTH_COMPRESSION -> tagValues = DNACompressorVaryingLengths.decompressList(key.getData()); default -> throw new IllegalStateException("Unexpected compression mode: " + mode); } List result = new ArrayList<>(); @@ -528,10 +528,12 @@ private void saveTagValues (List tagValuesList, boolean tagValuesConcord throw new IllegalArgumentException("Tag values have inconsistent lengths, but fixed length compression is enabled. Tag values: " + tagValuesList); tagValuesByteArray.increment(new ByteArrayWrapper(tagValuesBytes)); } + /* case VARYING_LENGTH_COMPRESSION -> { byte[] tagValuesBytes = DNACompressorVaryingLengths.compressList(tagValuesList); tagValuesByteArray.increment(new ByteArrayWrapper(tagValuesBytes)); } + */ } } diff --git a/src/java/org/broadinstitute/dropseqrna/utils/DNACompressor.java b/src/java/org/broadinstitute/dropseqrna/utils/DNACompressor.java index 2891eb9a..bfb8c27c 100644 --- a/src/java/org/broadinstitute/dropseqrna/utils/DNACompressor.java +++ b/src/java/org/broadinstitute/dropseqrna/utils/DNACompressor.java @@ -219,46 +219,4 @@ public static Comparator getDecompressedComparator(int[] lengths) { return Integer.compare(listA.size(), listB.size()); }; } - - - public static void main(String[] args) { - // Example DNA sequences - - List dnaList = List.of("ACGT", "TTACG", "GCGTACGTAC"); - System.out.println("Original DNA List: " + dnaList); - - // Compress the DNA sequences - byte[] compressed = compressList(dnaList); - System.out.println("Compressed Data: " + java.util.Arrays.toString(compressed)); - - // Lengths of the original sequences - int[] lengths = dnaList.stream().mapToInt(String::length).toArray(); - - // Decompress back into the original DNA sequences - List decompressed = decompressList(compressed, lengths); - System.out.println("Decompressed DNA List: " + decompressed); - - // Verify that the decompressed sequences match the originals - System.out.println("Matches original: " + dnaList.equals(decompressed)); - - // additional tests with NULL values - List dnaList2 = Arrays.asList("ACGT", "TTACG", null, "GCGTACGTAC"); - System.out.println("Original DNA List: " + dnaList2); - - // Compress the DNA sequences - byte[] compressed2 = compressList(dnaList2); - System.out.println("Compressed Data: " + java.util.Arrays.toString(compressed2)); - - // Lengths of the original sequences are the same as the original. Null values have length 0. - lengths = dnaList2.stream().mapToInt(dna -> dna == null ? 0 : dna.length()).toArray(); - - // Decompress back into the original DNA sequences - List decompressed2 = decompressList(compressed2, lengths); - System.out.println("Decompressed DNA List: " + decompressed2); - - // Verify that the decompressed sequences match the originals - System.out.println("Matches original: " + dnaList2.equals(decompressed2)); - - - } } diff --git a/src/java/org/broadinstitute/dropseqrna/utils/DNACompressorVaryingLengths.java b/src/java/org/broadinstitute/dropseqrna/utils/DNACompressorVaryingLengths.java index 20753420..702eef8f 100644 --- a/src/java/org/broadinstitute/dropseqrna/utils/DNACompressorVaryingLengths.java +++ b/src/java/org/broadinstitute/dropseqrna/utils/DNACompressorVaryingLengths.java @@ -37,6 +37,7 @@ public class DNACompressorVaryingLengths { * @param dnaList A list of DNA strings to compress. Each string must contain only 'A', 'C', 'G', 'T'. * @return A byte array representing the compressed DNA sequences, including metadata for lengths. */ + /* public static byte[] compressList(List dnaList) { List compressedSequences = new ArrayList<>(); int totalSize = 4; // 4 bytes for storing the number of sequences @@ -59,13 +60,14 @@ public static byte[] compressList(List dnaList) { return buffer.array(); } - + */ /** * Decompresses a single byte array into a list of DNA strings. * * @param compressed The byte array containing compressed DNA sequences with metadata. * @return A list of decompressed DNA strings. */ + /* public static List decompressList(byte[] compressed) { ByteBuffer buffer = ByteBuffer.wrap(compressed); @@ -83,10 +85,12 @@ public static List decompressList(byte[] compressed) { return decompressed; } + */ /** * Compresses a single DNA string into a byte array. */ + /* public static byte[] compress(String dna) { int length = dna.length(); int remainder = length % 4; @@ -111,10 +115,11 @@ public static byte[] compress(String dna) { return compressed; } - + */ /** * Decompresses a single compressed DNA sequence back into a string. */ + /* public static String decompress(byte[] compressed) { int remainder = compressed[0] & 0xFF; int byteLength = compressed.length - 1; @@ -156,4 +161,5 @@ public static void main(String[] args) { // Verify that the decompressed sequences match the originals System.out.println("Matches original: " + dnaList.equals(decompressed)); } + */ } diff --git a/src/tests/java/org/broadinstitute/dropseqrna/utils/ByteArrayWrapperTest.java b/src/tests/java/org/broadinstitute/dropseqrna/utils/ByteArrayWrapperTest.java new file mode 100644 index 00000000..4b227d2d --- /dev/null +++ b/src/tests/java/org/broadinstitute/dropseqrna/utils/ByteArrayWrapperTest.java @@ -0,0 +1,60 @@ +package org.broadinstitute.dropseqrna.utils; + +import org.testng.annotations.Test; +import static org.testng.Assert.*; + +public class ByteArrayWrapperTest { + + @Test + public void testEquals() { + byte[] data1 = {1, 2, 3}; + byte[] data2 = {1, 2, 3}; + byte[] data3 = {4, 5, 6}; + + ByteArrayWrapper wrapper1 = new ByteArrayWrapper(data1); + ByteArrayWrapper wrapper2 = new ByteArrayWrapper(data2); + ByteArrayWrapper wrapper3 = new ByteArrayWrapper(data3); + + assertEquals(wrapper1, wrapper2); + assertNotEquals(wrapper1, wrapper3); + } + + @Test + public void testHashCode() { + byte[] data1 = {1, 2, 3}; + byte[] data2 = {1, 2, 3}; + byte[] data3 = {4, 5, 6}; + + ByteArrayWrapper wrapper1 = new ByteArrayWrapper(data1); + ByteArrayWrapper wrapper2 = new ByteArrayWrapper(data2); + ByteArrayWrapper wrapper3 = new ByteArrayWrapper(data3); + + assertEquals(wrapper1.hashCode(), wrapper2.hashCode()); + assertNotEquals(wrapper1.hashCode(), wrapper3.hashCode()); + } + + @Test + public void testCompareTo() { + byte[] data1 = {1, 2, 3}; + byte[] data2 = {1, 2, 3}; + byte[] data3 = {4, 5, 6}; + byte[] data4 = {1, 2, 4}; + + ByteArrayWrapper wrapper1 = new ByteArrayWrapper(data1); + ByteArrayWrapper wrapper2 = new ByteArrayWrapper(data2); + ByteArrayWrapper wrapper3 = new ByteArrayWrapper(data3); + ByteArrayWrapper wrapper4 = new ByteArrayWrapper(data4); + + assertEquals(wrapper1.compareTo(wrapper2), 0); + assertTrue(wrapper1.compareTo(wrapper3) < 0); + assertTrue(wrapper3.compareTo(wrapper1) > 0); + assertTrue(wrapper1.compareTo(wrapper4) < 0); + } + + @Test + public void testGetData() { + byte[] data = {1, 2, 3}; + ByteArrayWrapper wrapper = new ByteArrayWrapper(data); + assertEquals(wrapper.getData(), data); + } +} \ No newline at end of file diff --git a/src/tests/java/org/broadinstitute/dropseqrna/utils/CompareBAMTagValuesTest.java b/src/tests/java/org/broadinstitute/dropseqrna/utils/CompareBAMTagValuesTest.java index fbd8e98e..ea742dd2 100644 --- a/src/tests/java/org/broadinstitute/dropseqrna/utils/CompareBAMTagValuesTest.java +++ b/src/tests/java/org/broadinstitute/dropseqrna/utils/CompareBAMTagValuesTest.java @@ -1,14 +1,22 @@ package org.broadinstitute.dropseqrna.utils; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMRecordSetBuilder; import htsjdk.samtools.util.Log; import org.testng.Assert; +import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; import picard.nio.PicardHtsPath; import java.io.File; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.List; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; public class CompareBAMTagValuesTest { @@ -34,12 +42,16 @@ public void testCompareBAMTagValuesUnpairedReads() { c.TAGS_2 = Arrays.asList("CB", "CR"); c.BAM_OUTPUT_1 = TestUtils.getTempReportFile("compare_starsolo_cellranger_1.", ".bam"); c.BAM_OUTPUT_2 = TestUtils.getTempReportFile("compare_starsolo_cellranger_2.", ".bam"); + c.BAM_UNIQUE_1 = TestUtils.getTempReportFile("starsolo_unique.", ".bam"); + c.BAM_UNIQUE_2 = TestUtils.getTempReportFile("cell_ranger_unique.", ".bam"); c.TAG_VALUES_OUTPUT = TestUtils.getTempReportFile("compare_starsolo_cellranger", ".report"); c.READ_COUNT_OUTPUT = TestUtils.getTempReportFile("compare_starsolo_cellranger", ".read_count"); c.useFixedLengthBaseCompression = true; c.STRICT = false; c.BAM_OUTPUT_1.deleteOnExit(); c.BAM_OUTPUT_2.deleteOnExit(); + c.BAM_UNIQUE_1.deleteOnExit(); + c.BAM_UNIQUE_2.deleteOnExit(); c.TAG_VALUES_OUTPUT.deleteOnExit(); c.READ_COUNT_OUTPUT.deleteOnExit(); int result = c.doWork(); @@ -99,4 +111,89 @@ public void testCompareBAMTagValuesPairedReads() { Assert.assertTrue(outputSame); } + + + /** + * READ_NAME_COMPARATOR TESTS + * (The things we do for code coverage...) + */ + + private SAMRecordSetBuilder builder; + private List records; + + @BeforeMethod + public void setUp() { + builder = new SAMRecordSetBuilder(); + records = new ArrayList<>(); + } + + @Test + public void testPairedReadOrderComparator_FirstOfPair() { + builder.addUnmappedFragment("read1_1"); + builder.addUnmappedFragment("read1_2"); + builder.iterator().forEachRemaining(records::add); + + SAMRecord read1 = records.get(0); + read1.setReadPairedFlag(true); + read1.setFirstOfPairFlag(true); + + SAMRecord read2 = records.get(1); + read2.setReadPairedFlag(true); + read2.setSecondOfPairFlag(true); + + int result = CompareBAMTagValues.PAIRED_READ_ORDER_COMPARATOR.compare(read1, read2); + assertTrue(result < 0, "First of pair should come before second of pair"); + } + + @Test + public void testPairedReadOrderComparator_SecondOfPair() { + builder.addUnmappedFragment("read1_1"); + builder.addUnmappedFragment("read1_2"); + builder.iterator().forEachRemaining(records::add); + + SAMRecord read1 = records.get(0); + read1.setReadPairedFlag(true); + read1.setSecondOfPairFlag(true); + + SAMRecord read2 = records.get(1); + read2.setReadPairedFlag(true); + read2.setFirstOfPairFlag(true); + + int result = CompareBAMTagValues.PAIRED_READ_ORDER_COMPARATOR.compare(read1, read2); + assertTrue(result > 0, "Second of pair should come after first of pair"); + } + + @Test + public void testPairedReadOrderComparator_UnpairedRead() { + builder.addUnmappedFragment("read1_1"); + builder.addUnmappedFragment("read1_2"); + builder.iterator().forEachRemaining(records::add); + + SAMRecord read1 = records.get(0); + read1.setReadPairedFlag(false); + + SAMRecord read2 = records.get(1); + read2.setReadPairedFlag(true); + read2.setFirstOfPairFlag(true); + + int result = CompareBAMTagValues.PAIRED_READ_ORDER_COMPARATOR.compare(read1, read2); + assertTrue(result > 0, "Unpaired read should come after paired read"); + } + + @Test + public void testPairedReadOrderComparator_BothUnpaired() { + builder.addUnmappedFragment("read1_1"); + builder.addUnmappedFragment("read2_1"); + builder.iterator().forEachRemaining(records::add); + + SAMRecord read1 = records.get(0); + read1.setReadPairedFlag(false); + + SAMRecord read2 = records.get(1); + read2.setReadPairedFlag(false); + + int result = CompareBAMTagValues.PAIRED_READ_ORDER_COMPARATOR.compare(read1, read2); + assertEquals(result, 0, "Both unpaired reads should be considered equal"); + } + } diff --git a/src/tests/java/org/broadinstitute/dropseqrna/utils/DNACompressorTest.java b/src/tests/java/org/broadinstitute/dropseqrna/utils/DNACompressorTest.java index f65b6b9d..a32f9698 100644 --- a/src/tests/java/org/broadinstitute/dropseqrna/utils/DNACompressorTest.java +++ b/src/tests/java/org/broadinstitute/dropseqrna/utils/DNACompressorTest.java @@ -4,6 +4,7 @@ import org.testng.annotations.Test; import java.util.Arrays; +import java.util.Comparator; import java.util.List; import static org.testng.Assert.*; @@ -32,6 +33,30 @@ public void testCompressRoundTrip(String dna) { assertEquals(result, dna); } + @Test + public void testGetDecompressedComparator() { + // Sample DNA sequences + List dnaList1 = Arrays.asList("ACGT", "TTACG", "GCGTACGTAC"); + List dnaList2 = Arrays.asList("ACGT", "TTACG", "GCGTACGTAC"); + List dnaList3 = Arrays.asList("ACGT", "TTACG", "GCGTANGTAC"); + + // Compress the DNA sequences + byte[] compressed1 = DNACompressor.compressList(dnaList1); + byte[] compressed2 = DNACompressor.compressList(dnaList2); + byte[] compressed3 = DNACompressor.compressList(dnaList3); + + // Lengths of the original sequences + int[] lengths = dnaList1.stream().mapToInt(String::length).toArray(); + + // Get the comparator + Comparator comparator = DNACompressor.getDecompressedComparator(lengths); + + // Compare the compressed sequences + assertEquals(comparator.compare(compressed1, compressed2), 0); + assertTrue(comparator.compare(compressed1, compressed3) < 0); + assertTrue(comparator.compare(compressed3, compressed1) > 0); + } + @DataProvider(name = "dnaStrings") diff --git a/src/tests/java/org/broadinstitute/dropseqrna/utils/readiterators/BAMTagCleanupIteratorTest.java b/src/tests/java/org/broadinstitute/dropseqrna/utils/readiterators/BAMTagCleanupIteratorTest.java new file mode 100644 index 00000000..554e5b6d --- /dev/null +++ b/src/tests/java/org/broadinstitute/dropseqrna/utils/readiterators/BAMTagCleanupIteratorTest.java @@ -0,0 +1,97 @@ +package org.broadinstitute.dropseqrna.utils.readiterators; + +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMRecordSetBuilder; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.regex.Pattern; + +import static org.testng.Assert.*; + +public class BAMTagCleanupIteratorTest { + + private List records; + private Iterator underlyingIterator; + + @BeforeMethod + public void setUp() { + SAMRecordSetBuilder builder = new SAMRecordSetBuilder(); + builder.addUnmappedFragment("read1"); + builder.addUnmappedFragment("read2"); + records = new ArrayList<>(); + builder.iterator().forEachRemaining(records::add); + } + + @Test + public void testPrefixRemoval() { + records.forEach(record -> record.setAttribute("XT", "prefix_value")); + underlyingIterator = records.iterator(); + BAMTagCleanupIterator iterator = new BAMTagCleanupIterator.Builder(underlyingIterator) + .tag("XT") + .prefixToRemove("prefix_") + .build(); + + SAMRecord result = iterator.next(); + assertEquals(result.getStringAttribute("XT"), "value"); + } + + @Test + public void testSuffixRemoval() { + records.forEach(record -> record.setAttribute("XT", "value_suffix")); + underlyingIterator = records.iterator(); + BAMTagCleanupIterator iterator = new BAMTagCleanupIterator.Builder(underlyingIterator) + .tag("XT") + .suffixToRemove("_suffix") + .build(); + + SAMRecord result = iterator.next(); + assertEquals(result.getStringAttribute("XT"), "value"); + } + + @Test + public void testPatternRemoval() { + records.forEach(record -> record.setAttribute("XT", "value_to_remove")); + underlyingIterator = records.iterator(); + BAMTagCleanupIterator iterator = new BAMTagCleanupIterator.Builder(underlyingIterator) + .tag("XT") + .patternToRemove(Pattern.compile("_to_remove")) + .build(); + + SAMRecord result = iterator.next(); + assertEquals(result.getStringAttribute("XT"), "value"); + } + + @Test + public void testPrefixAndSuffixAddition() { + records.forEach(record -> record.setAttribute("XT", "value")); + underlyingIterator = records.iterator(); + BAMTagCleanupIterator iterator = new BAMTagCleanupIterator.Builder(underlyingIterator) + .tag("XT") + .prefixToAdd("prefix_") + .suffixToAdd("_suffix") + .build(); + + SAMRecord result = iterator.next(); + assertEquals(result.getStringAttribute("XT"), "prefix_value_suffix"); + } + + @Test + public void testCombinedTransformations() { + records.forEach(record -> record.setAttribute("XT", "prefix_value_suffix")); + underlyingIterator = records.iterator(); + BAMTagCleanupIterator iterator = new BAMTagCleanupIterator.Builder(underlyingIterator) + .tag("XT") + .prefixToRemove("prefix_") + .suffixToRemove("_suffix") + .prefixToAdd("new_prefix_") + .suffixToAdd("_new_suffix") + .build(); + + SAMRecord result = iterator.next(); + assertEquals(result.getStringAttribute("XT"), "new_prefix_value_new_suffix"); + } +} \ No newline at end of file diff --git a/src/tests/java/org/broadinstitute/dropseqrna/utils/readiterators/ReadNameCleanupIteratorTest.java b/src/tests/java/org/broadinstitute/dropseqrna/utils/readiterators/ReadNameCleanupIteratorTest.java new file mode 100644 index 00000000..47b7aa15 --- /dev/null +++ b/src/tests/java/org/broadinstitute/dropseqrna/utils/readiterators/ReadNameCleanupIteratorTest.java @@ -0,0 +1,87 @@ +package org.broadinstitute.dropseqrna.utils.readiterators; + +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMRecordSetBuilder; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.regex.Pattern; + +import static org.testng.Assert.*; + +public class ReadNameCleanupIteratorTest { + + private List records; + private Iterator underlyingIterator; + + @BeforeMethod + public void setUp() { + SAMRecordSetBuilder builder = new SAMRecordSetBuilder(); + builder.addUnmappedFragment("prefix_read1_suffix"); + builder.addUnmappedFragment("prefix_read2_suffix"); + records = new ArrayList<>(); + builder.iterator().forEachRemaining(records::add); + } + + @Test + public void testPrefixRemoval() { + underlyingIterator = records.iterator(); + ReadNameCleanupIterator iterator = new ReadNameCleanupIterator.Builder(underlyingIterator) + .prefixToRemove("prefix_") + .build(); + + SAMRecord result = iterator.next(); + assertEquals(result.getReadName(), "read1_suffix"); + } + + @Test + public void testSuffixRemoval() { + underlyingIterator = records.iterator(); + ReadNameCleanupIterator iterator = new ReadNameCleanupIterator.Builder(underlyingIterator) + .suffixToRemove("_suffix") + .build(); + + SAMRecord result = iterator.next(); + assertEquals(result.getReadName(), "prefix_read1"); + } + + @Test + public void testPatternRemoval() { + underlyingIterator = records.iterator(); + ReadNameCleanupIterator iterator = new ReadNameCleanupIterator.Builder(underlyingIterator) + .patternToRemove(Pattern.compile("_read1_")) + .build(); + + SAMRecord result = iterator.next(); + assertEquals(result.getReadName(), "prefixsuffix"); + } + + @Test + public void testPrefixAndSuffixAddition() { + underlyingIterator = records.iterator(); + ReadNameCleanupIterator iterator = new ReadNameCleanupIterator.Builder(underlyingIterator) + .prefixToAdd("new_prefix_") + .suffixToAdd("_new_suffix") + .build(); + + SAMRecord result = iterator.next(); + assertEquals(result.getReadName(), "new_prefix_prefix_read1_suffix_new_suffix"); + } + + @Test + public void testCombinedTransformations() { + underlyingIterator = records.iterator(); + ReadNameCleanupIterator iterator = new ReadNameCleanupIterator.Builder(underlyingIterator) + .prefixToRemove("prefix_") + .suffixToRemove("_suffix") + .prefixToAdd("new_prefix_") + .suffixToAdd("_new_suffix") + .build(); + + SAMRecord result = iterator.next(); + assertEquals(result.getReadName(), "new_prefix_read1_new_suffix"); + } +} \ No newline at end of file