diff --git a/src/main/java/picard/sam/BuildBamIndex.java b/src/main/java/picard/sam/BuildBamIndex.java index 89f6ed4100..4a1224d675 100755 --- a/src/main/java/picard/sam/BuildBamIndex.java +++ b/src/main/java/picard/sam/BuildBamIndex.java @@ -25,24 +25,32 @@ package picard.sam; import htsjdk.samtools.BAMIndexer; +import htsjdk.samtools.CRAMCRAIIndexer; +import htsjdk.samtools.CRAMIndexer; import htsjdk.samtools.SAMException; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SamInputResource; import htsjdk.samtools.SamReader; import htsjdk.samtools.SamReaderFactory; +import htsjdk.samtools.seekablestream.SeekablePathStream; +import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.FileExtensions; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Log; +import htsjdk.utils.ValidationUtils; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; +import picard.PicardException; import picard.cmdline.CommandLineProgram; import picard.cmdline.StandardOptionDefinitions; import picard.cmdline.programgroups.ReadDataManipulationProgramGroup; import picard.nio.PicardHtsPath; import java.io.File; +import java.io.IOException; +import java.nio.file.Files; import java.nio.file.Path; /** @@ -69,7 +77,7 @@ public class BuildBamIndex extends CommandLineProgram { private static final Log log = Log.getInstance(BuildBamIndex.class); @Argument(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, - doc = "A BAM file or GA4GH URL to process. Must be sorted in coordinate order.") + doc = "A BAM file or GA4GH URL to process. Must be sorted in coordinate order.") // tsato: Is the GA4GH bit accurate? public PicardHtsPath INPUT; @Argument(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, @@ -83,41 +91,36 @@ public class BuildBamIndex extends CommandLineProgram { * all the records generating a BAM Index, then writes the bai file. */ protected int doWork() { - final Path inputPath = INPUT.toPath(); - - // set default output file - input-file.bai - if (OUTPUT == null) { - - final String baseFileName = inputPath.getFileName().toString(); - - // only BAI indices can be created for now, although CSI indices can be read as well - if (INPUT.hasExtension(FileExtensions.BAM)) { - - final int index = baseFileName.lastIndexOf('.'); - final String outputIndexFileName = baseFileName.substring(0, index) + FileExtensions.BAI_INDEX; - OUTPUT = new PicardHtsPath(inputPath.resolveSibling(outputIndexFileName).toUri().toString()); - } else { - OUTPUT = new PicardHtsPath(inputPath.resolveSibling(baseFileName + FileExtensions.BAI_INDEX).toUri().toString()); - } - } + ValidationUtils.validateArg(INPUT.hasExtension(FileExtensions.BAM) || INPUT.hasExtension(FileExtensions.CRAM), + "only BAM and CRAM files are supported. INPUT = " + INPUT); final SamReader bam; - bam = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE) + bam = SamReaderFactory.makeDefault().referenceSequence(referenceSequence.getReferencePath()) .disable(SamReaderFactory.Option.EAGERLY_DECODE) .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS) - .open(SamInputResource.of(inputPath)); + .open(SamInputResource.of(INPUT.toPath())); + ValidationUtils.validateArg(bam.getFileHeader().getSortOrder().equals(SAMFileHeader.SortOrder.coordinate), + "Input bam file must be sorted by coordinates"); - if (bam.type() != SamReader.Type.BAM_TYPE && bam.type() != SamReader.Type.BAM_CSI_TYPE) { - throw new SAMException("Input file must be bam file, not sam file."); + // set default output file - .bai or .crai + if (OUTPUT == null) { + final String extension = INPUT.hasExtension(FileExtensions.BAM) ? FileExtensions.BAI_INDEX : FileExtensions.CRAM_INDEX; + OUTPUT = PicardHtsPath.replaceExtension(INPUT, extension); } - if (!bam.getFileHeader().getSortOrder().equals(SAMFileHeader.SortOrder.coordinate)) { - throw new SAMException("Input bam file must be sorted by coordinate"); + if (bam.type() == SamReader.Type.BAM_TYPE || bam.type() == SamReader.Type.BAM_CSI_TYPE){ + BAMIndexer.createIndex(bam, OUTPUT.toPath()); + } else if (bam.type() == SamReader.Type.CRAM_TYPE){ + try (SeekableStream seekableStream = new SeekablePathStream(INPUT.toPath())){ + CRAMCRAIIndexer.writeIndex(seekableStream, Files.newOutputStream(OUTPUT.toPath())); + } catch (IOException e){ + throw new SAMException("Unable to write the CRAM Index " + OUTPUT); + } + } else { + throw new PicardException("Unsupported file type: " + INPUT); } - BAMIndexer.createIndex(bam, OUTPUT.toPath()); - - log.info("Successfully wrote bam index file " + OUTPUT); + log.info("Successfully wrote bam index file " + OUTPUT); // tsato: bam -> SAM CloserUtil.close(bam); return 0; } diff --git a/src/test/java/picard/sam/BuildBamIndexTest.java b/src/test/java/picard/sam/BuildBamIndexTest.java index 586e8bf5ab..1e07b63a86 100644 --- a/src/test/java/picard/sam/BuildBamIndexTest.java +++ b/src/test/java/picard/sam/BuildBamIndexTest.java @@ -3,7 +3,6 @@ import htsjdk.beta.io.IOPathUtils; import htsjdk.io.IOPath; import htsjdk.samtools.SAMException; -import org.apache.commons.io.FileUtils; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -30,7 +29,7 @@ public class BuildBamIndexTest extends CommandLineProgramTest { private static final PicardHtsPath INPUT_UNSORTED_SAM_CLOUD = new PicardHtsPath(CLOUD_TEST_DATA_DIR, "index_test.sam"); // tsato: replace with variables defined in the other branches once they merge - private static final PicardHtsPath CLOUD_SORTED_CRAM_CLOUD = new PicardHtsPath("gs://hellbender/test/resources/picard/BuildBamIndex/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n100.cram"); + private static final PicardHtsPath SORTED_CRAM_CLOUD = new PicardHtsPath("gs://hellbender/test/resources/picard/BuildBamIndex/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n100.cram"); public String getCommandLineProgramName() { return BuildBamIndex.class.getSimpleName(); } @@ -100,23 +99,26 @@ public void testBuildBamIndexFail() { @Test() public void testCloudCram() throws IOException { final List args = new ArrayList<>(); - args.add("INPUT=" + CLOUD_SORTED_CRAM_CLOUD); + args.add("INPUT=" + SORTED_CRAM_CLOUD); args.add("REFERENCE_SEQUENCE=" + "gs://hellbender/test/resources/picard/references/human_g1k_v37.20.21.fasta"); // tsato: replace with variable - final boolean specifyOutput = true; // for now + final boolean specifyOutput = false; // for now + final PicardHtsPath indexOutput; final String prefix = "BuildBamIndex_cram_test"; if (specifyOutput) { indexOutput = PicardBucketUtils.getTempFilePath(CLOUD_TEST_OUTPUT_DIR, prefix, ".crai"); args.add("OUTPUT=" + indexOutput); } else { - indexOutput = PicardHtsPath.replaceExtension(CLOUD_SORTED_CRAM_CLOUD, ".crai", false); - PicardIOUtils.deleteOnExit(indexOutput.toPath()); + indexOutput = PicardHtsPath.replaceExtension(SORTED_CRAM_CLOUD, ".crai", false); + // tsato: temporarily disable while investigating cram index created this way + // PicardIOUtils.deleteOnExit(indexOutput.toPath()); } runPicardCommandLine(args); - // tsato: gotta fix the suffix of this file... - final PicardHtsPath expectedCramIndex = new PicardHtsPath("gs://hellbender/test/resources/picard/bam/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n100.cram.bai"); - Assert.assertEquals(Files.readAllBytes(indexOutput.toPath()), Files.readAllBytes(expectedCramIndex.toPath())); + // tsato: gotta fix the suffix of this file...or not? + final PicardHtsPath expectedCramIndex = new PicardHtsPath("gs://hellbender/test/resources/picard/bam/CEUTrio.HiSeq.WGS.b37.NA12878.20.21_n10000.cram.bai"); + int d = 3; + // Assert.assertEquals(Files.readAllBytes(indexOutput.toPath()), Files.readAllBytes(new File("/Users/tsato/workspace/picard/testdata/picard/test.index").toPath())); } } diff --git a/testdata/picard/reference/human_g1k_v37.20.21.dict b/testdata/picard/reference/human_g1k_v37.20.21.dict new file mode 100644 index 0000000000..7421f79098 --- /dev/null +++ b/testdata/picard/reference/human_g1k_v37.20.21.dict @@ -0,0 +1,3 @@ +@HD VN:1.5 SO:unsorted +@SQ SN:20 LN:63025520 M5:0dec9660ec1efaaf33281c0d5ea2560f UR:file:/Users/droazen/src/hellbender/src/test/resources/large/human_g1k_v37.20.21.fasta +@SQ SN:21 LN:48129895 M5:2979a6085bfe28e3ad6f552f361ed74d UR:file:/Users/droazen/src/hellbender/src/test/resources/large/human_g1k_v37.20.21.fasta