From fc248dfc103e5e7130bbfcdd331be0a233136de5 Mon Sep 17 00:00:00 2001 From: Mark Walker Date: Tue, 3 Dec 2024 13:45:14 -0500 Subject: [PATCH 1/5] Prioritize het calls when merging clustered SVs (#9058) --- .../sv/cluster/CanonicalSVCollapser.java | 31 ++- .../cluster/CanonicalSVCollapserUnitTest.java | 185 ++++++++++++++---- .../walkers/sv/SVClusterIntegrationTest.java | 6 +- 3 files changed, 174 insertions(+), 48 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapser.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapser.java index 8a1f68567a5..ee6e140b793 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapser.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapser.java @@ -122,30 +122,35 @@ public enum FlagFieldLogic { /** * Comparators used for picking the representative genotype for a given sample */ + // Priotize non-ref over ref final Comparator genotypeIsNonRefComparator = (o1, o2) -> { final long count1 = Math.min(1, o1.getAlleles().stream().filter(Allele::isNonReference).filter(Allele::isCalled).count()); final long count2 = Math.min(1, o2.getAlleles().stream().filter(Allele::isNonReference).filter(Allele::isCalled).count()); return Long.compare(count1, count2); }; + // Priotize fewer ALT alleles over more. When applied after non-ref comparator, hom-ref genotypes will not be encountered. final Comparator genotypeNonRefCountComparator = (o1, o2) -> { final long count1 = o1.getAlleles().stream().filter(Allele::isNonReference).filter(Allele::isCalled).count(); final long count2 = o2.getAlleles().stream().filter(Allele::isNonReference).filter(Allele::isCalled).count(); - return Long.compare(count1, count2); + return Long.compare(count2, count1); }; + // Priotize called genotypes final Comparator genotypeCalledComparator = (o1, o2) -> { final long count1 = o1.getAlleles().stream().filter(Allele::isCalled).count(); final long count2 = o2.getAlleles().stream().filter(Allele::isCalled).count(); return Long.compare(count1, count2); }; + // Priotize higher quality final Comparator genotypeQualityComparator = (o1, o2) -> { final int quality1 = VariantContextGetters.getAttributeAsInt(o1, VCFConstants.GENOTYPE_QUALITY_KEY, 0); final int quality2 = VariantContextGetters.getAttributeAsInt(o2, VCFConstants.GENOTYPE_QUALITY_KEY, 0); return Integer.compare(quality1, quality2); }; + // Priotize higher depth genotyping quality final Comparator genotypeCopyNumberQualityComparator = new Comparator() { @Override public int compare(Genotype o1, Genotype o2) { @@ -155,6 +160,7 @@ public int compare(Genotype o1, Genotype o2) { } }; + // Priotize depth genotypes closer to reference final Comparator genotypeCopyNumberComparator = new Comparator() { @Override public int compare(Genotype o1, Genotype o2) { @@ -162,7 +168,25 @@ public int compare(Genotype o1, Genotype o2) { final int copyNumber1 = VariantContextGetters.getAttributeAsInt(o1, GATKSVVCFConstants.COPY_NUMBER_FORMAT, 0); final int expectedQualityNumber2 = VariantContextGetters.getAttributeAsInt(o2, GATKSVVCFConstants.EXPECTED_COPY_NUMBER_FORMAT, 0); final int copyNumber2 = VariantContextGetters.getAttributeAsInt(o2, GATKSVVCFConstants.COPY_NUMBER_FORMAT, 0); - return Double.compare(Math.abs(expectedQualityNumber1 - copyNumber1), Math.abs(expectedQualityNumber2 - copyNumber2)); + return Double.compare(Math.abs(expectedQualityNumber2 - copyNumber2), Math.abs(expectedQualityNumber1 - copyNumber1)); + } + }; + + // Priotize DEL over DUP as final tiebreaker + final Comparator genotypeDelOverDupComparator = new Comparator() { + @Override + public int compare(Genotype o1, Genotype o2) { + final int expectedCN1 = VariantContextGetters.getAttributeAsInt(o1, GATKSVVCFConstants.EXPECTED_COPY_NUMBER_FORMAT, 0); + final boolean isDel1 = VariantContextGetters.getAttributeAsInt(o1, GATKSVVCFConstants.COPY_NUMBER_FORMAT, expectedCN1) < expectedCN1; + final int expectedCN2 = VariantContextGetters.getAttributeAsInt(o2, GATKSVVCFConstants.EXPECTED_COPY_NUMBER_FORMAT, 0); + final boolean isDel2 = VariantContextGetters.getAttributeAsInt(o2, GATKSVVCFConstants.COPY_NUMBER_FORMAT, expectedCN2) < expectedCN2; + if (isDel1 && !isDel2) { + return 1; + } else if (isDel2 && !isDel1) { + return -1; + } else { + return 0; + } } }; @@ -461,7 +485,8 @@ protected Genotype getRepresentativeGenotype(final Collection genotype .thenComparing(genotypeQualityComparator) .thenComparing(genotypeNonRefCountComparator) .thenComparing(genotypeCopyNumberQualityComparator) - .thenComparing(genotypeCopyNumberComparator)).get(); + .thenComparing(genotypeCopyNumberComparator) + .thenComparing(genotypeDelOverDupComparator)).get(); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapserUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapserUnitTest.java index 3a692ab4001..0b83beadb0b 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapserUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapserUnitTest.java @@ -646,23 +646,23 @@ public Object[][] collapseSampleGenotypesTestData() { }, // het preferred over hom ref even with lower gq { + "sample", + Lists.newArrayList( + Lists.newArrayList(Allele.REF_N, Allele.REF_N), + Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS) + ), + Lists.newArrayList( + createGenotypeTestAttributesWithGQ(2, 30), + createGenotypeTestAttributesWithGQ(2, 20) + ), + Allele.REF_N, + GenotypeBuilder.create( "sample", - Lists.newArrayList( - Lists.newArrayList(Allele.REF_N, Allele.REF_N), - Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS) - ), - Lists.newArrayList( - createGenotypeTestAttributesWithGQ(2, 30), - createGenotypeTestAttributesWithGQ(2, 20) - ), - Allele.REF_N, - GenotypeBuilder.create( - "sample", - Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS), - createGenotypeTestAttributesWithGQ(2, 20) - ) - }, - // hom var preferred over het + Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS), + createGenotypeTestAttributesWithGQ(2, 20) + ) + }, + // het preferred over hom-var { "sample", Lists.newArrayList( @@ -676,11 +676,11 @@ public Object[][] collapseSampleGenotypesTestData() { Allele.REF_N, GenotypeBuilder.create( "sample", - Lists.newArrayList(Allele.SV_SIMPLE_INS, Allele.SV_SIMPLE_INS), + Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS), createGenotypeTestAttributes(2) ) }, - // hom-var over het if GQ equal + // het over hom-var if GQ equal { "sample", Lists.newArrayList( @@ -694,11 +694,11 @@ public Object[][] collapseSampleGenotypesTestData() { Allele.REF_N, GenotypeBuilder.create( "sample", - Lists.newArrayList(Allele.SV_SIMPLE_INS, Allele.SV_SIMPLE_INS), + Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS), createGenotypeTestAttributes(2) ) }, - // het over hom-var if GQ is higher + // hom-var over het if GQ is higher { "sample", Lists.newArrayList( @@ -706,17 +706,17 @@ public Object[][] collapseSampleGenotypesTestData() { Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS) ), Lists.newArrayList( - createGenotypeTestAttributesWithGQ(2, 30), - createGenotypeTestAttributesWithGQ(2, 40) + createGenotypeTestAttributesWithGQ(2, 40), + createGenotypeTestAttributesWithGQ(2, 30) ), Allele.REF_N, GenotypeBuilder.create( "sample", - Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS), + Lists.newArrayList(Allele.SV_SIMPLE_INS, Allele.SV_SIMPLE_INS), createGenotypeTestAttributesWithGQ(2, 40) ) }, - // hom var preferred over hom-ref too + // het preferred over hom-ref too { "sample", Lists.newArrayList( @@ -732,7 +732,7 @@ public Object[][] collapseSampleGenotypesTestData() { Allele.REF_N, GenotypeBuilder.create( "sample", - Lists.newArrayList(Allele.SV_SIMPLE_INS, Allele.SV_SIMPLE_INS), + Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS), createGenotypeTestAttributesWithGQ(2, 30) ) }, @@ -791,7 +791,7 @@ public Object[][] collapseSampleGenotypesTestData() { Allele.REF_N, GenotypeBuilder.create( "sample", - Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS, Allele.SV_SIMPLE_INS), + Lists.newArrayList(Allele.REF_N, Allele.REF_N, Allele.SV_SIMPLE_INS), createGenotypeTestAttributes(3) ) }, @@ -813,7 +813,7 @@ public Object[][] collapseSampleGenotypesTestData() { Allele.REF_N, GenotypeBuilder.create( "sample", - Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS, Allele.SV_SIMPLE_INS), + Lists.newArrayList(Allele.REF_N, Allele.REF_N, Allele.SV_SIMPLE_INS), createGenotypeTestAttributes(3) ) }, @@ -841,7 +841,7 @@ public Object[][] collapseSampleGenotypesTestData() { Allele.REF_N, GenotypeBuilder.create( "sample", - Lists.newArrayList(Allele.SV_SIMPLE_INS, Allele.SV_SIMPLE_INS, Allele.SV_SIMPLE_INS), + Lists.newArrayList(Allele.REF_N, Allele.REF_N, Allele.SV_SIMPLE_INS), createGenotypeTestAttributes(3) ) }, @@ -1083,7 +1083,7 @@ public Object[][] collapseSampleGenotypesTestData() { createGenotypeTestAttributes(1, 1) ) }, - // Multi-allelic CNV, rare case with equal CNQ take CN!=ECN + // Multi-allelic CNV, with no CNQ use copy state closest to ref { "sample", Lists.newArrayList( @@ -1091,35 +1091,34 @@ public Object[][] collapseSampleGenotypesTestData() { Collections.singletonList(Allele.NO_CALL) ), Lists.newArrayList( - createGenotypeTestAttributesWithCNQ(1, 1, 30), - createGenotypeTestAttributesWithCNQ(1, 0, 30) + createGenotypeTestAttributes(1, 1), + createGenotypeTestAttributes(1, 2) ), Allele.REF_N, GenotypeBuilder.create( "sample", Lists.newArrayList(Allele.NO_CALL), - createGenotypeTestAttributesWithCNQ(1, 0, 30) + createGenotypeTestAttributes(1, 1) ) }, - // Multi-allelic CNV, haploid dup + // Multi-allelic CNV, when CNQ equal use copy state closest to ref { "sample", Lists.newArrayList( - Collections.singletonList(Allele.NO_CALL), - Collections.singletonList(Allele.NO_CALL) + Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL), + Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL) ), Lists.newArrayList( - createGenotypeTestAttributes(1, 1), - createGenotypeTestAttributes(1, 2) + createGenotypeTestAttributesWithCNQ(1, 1, 30), + createGenotypeTestAttributesWithCNQ(1, 2, 30) ), Allele.REF_N, GenotypeBuilder.create( "sample", - Lists.newArrayList(Allele.NO_CALL), - createGenotypeTestAttributes(1, 2) + Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL), + createGenotypeTestAttributesWithCNQ(1, 1, 30) ) }, - // Multi-allelic CNV, when CNQ equal use CN!=ECN { "sample", Lists.newArrayList( @@ -1128,16 +1127,32 @@ public Object[][] collapseSampleGenotypesTestData() { ), Lists.newArrayList( createGenotypeTestAttributesWithCNQ(2, 2, 30), - createGenotypeTestAttributesWithCNQ(2, 1, 30) + createGenotypeTestAttributesWithCNQ(2, 3, 30) ), Allele.REF_N, GenotypeBuilder.create( "sample", Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL), + createGenotypeTestAttributesWithCNQ(2, 2, 30) + ) + }, + { + "sample", + Lists.newArrayList( + Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL), + Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL) + ), + Lists.newArrayList( + createGenotypeTestAttributesWithCNQ(2, 2, 30), createGenotypeTestAttributesWithCNQ(2, 1, 30) + ), + Allele.REF_N, + GenotypeBuilder.create( + "sample", + Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL), + createGenotypeTestAttributesWithCNQ(2, 2, 30) ) }, - // Multi-allelic CNV, when CNQ equal use CN!=ECN { "sample", Lists.newArrayList( @@ -1152,7 +1167,41 @@ public Object[][] collapseSampleGenotypesTestData() { GenotypeBuilder.create( "sample", Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL), + createGenotypeTestAttributesWithCNQ(2, 2, 30) + ) + }, + { + "sample", + Lists.newArrayList( + Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL), + Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL) + ), + Lists.newArrayList( + createGenotypeTestAttributesWithCNQ(2, 1, 30), createGenotypeTestAttributesWithCNQ(2, 0, 30) + ), + Allele.REF_N, + GenotypeBuilder.create( + "sample", + Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL), + createGenotypeTestAttributesWithCNQ(2, 1, 30) + ) + }, + { + "sample", + Lists.newArrayList( + Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL), + Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL) + ), + Lists.newArrayList( + createGenotypeTestAttributesWithCNQ(2, 5, 30), + createGenotypeTestAttributesWithCNQ(2, 6, 30) + ), + Allele.REF_N, + GenotypeBuilder.create( + "sample", + Lists.newArrayList(Allele.NO_CALL, Allele.NO_CALL), + createGenotypeTestAttributesWithCNQ(2, 5, 30) ) }, // Multi-allelic CNV, conflicting del and dup genotypes determined by CNQ @@ -1190,6 +1239,58 @@ public Object[][] collapseSampleGenotypesTestData() { createGenotypeTestAttributesWithCNQ(1, 2, 50) ) }, + // DEL prioritized over DUP tiebreaker when same distance from expected copy state + { + "sample", + Lists.newArrayList( + Collections.singletonList(Allele.NO_CALL), + Collections.singletonList(Allele.NO_CALL) + ), + Lists.newArrayList( + createGenotypeTestAttributesWithCNQ(1, 2, 30), + createGenotypeTestAttributesWithCNQ(1, 0, 30) + ), + Allele.REF_N, + GenotypeBuilder.create( + "sample", + Lists.newArrayList(Allele.NO_CALL), + createGenotypeTestAttributesWithCNQ(1, 0, 30) + ) + }, + { + "sample", + Lists.newArrayList( + Collections.singletonList(Allele.NO_CALL), + Collections.singletonList(Allele.NO_CALL) + ), + Lists.newArrayList( + createGenotypeTestAttributesWithCNQ(2, 0, 30), + createGenotypeTestAttributesWithCNQ(2, 4, 30) + ), + Allele.REF_N, + GenotypeBuilder.create( + "sample", + Lists.newArrayList(Allele.NO_CALL), + createGenotypeTestAttributesWithCNQ(2, 0, 30) + ) + }, + { + "sample", + Lists.newArrayList( + Collections.singletonList(Allele.NO_CALL), + Collections.singletonList(Allele.NO_CALL) + ), + Lists.newArrayList( + createGenotypeTestAttributesWithCNQ(2, 1, 30), + createGenotypeTestAttributesWithCNQ(2, 3, 30) + ), + Allele.REF_N, + GenotypeBuilder.create( + "sample", + Lists.newArrayList(Allele.NO_CALL), + createGenotypeTestAttributesWithCNQ(2, 1, 30) + ) + }, }; } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVClusterIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVClusterIntegrationTest.java index ec3e6e15f96..75016e8744b 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVClusterIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVClusterIntegrationTest.java @@ -460,11 +460,11 @@ public void testClusterSampleOverlap() { final int nonRefGenotypeCount = (int) variant.getGenotypes().stream().filter(g -> SVCallRecordUtils.isAltGenotype(g)).count(); Assert.assertEquals(nonRefGenotypeCount, 71); final int alleleCount = (int) variant.getGenotypes().stream().flatMap(g -> g.getAlleles().stream()).filter(SVCallRecordUtils::isAltAllele).count(); - Assert.assertEquals(alleleCount, 94); + Assert.assertEquals(alleleCount, 87); final Genotype g = variant.getGenotype("HG00129"); - Assert.assertTrue(g.isHomVar()); + Assert.assertTrue(g.isHet()); Assert.assertEquals(VariantContextGetters.getAttributeAsInt(g, GATKSVVCFConstants.EXPECTED_COPY_NUMBER_FORMAT, -1), 2); - Assert.assertEquals(VariantContextGetters.getAttributeAsInt(g, GATKSVVCFConstants.COPY_NUMBER_FORMAT, -1), 0); + Assert.assertEquals(VariantContextGetters.getAttributeAsInt(g, GATKSVVCFConstants.COPY_NUMBER_FORMAT, -1), 1); } } Assert.assertEquals(expectedRecordsFound, 1); From ee25c7b0a3ef7eb417d8211e1b803fcd0b7124bb Mon Sep 17 00:00:00 2001 From: Louis Bergelson Date: Tue, 10 Dec 2024 16:45:38 -0500 Subject: [PATCH 2/5] Updating upload_artifact in github actions (#9061) Update upload_artifact from 3 -> 4 --- .github/actions/upload-gatk-test-results/action.yml | 2 +- .github/workflows/gatk-tests.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/actions/upload-gatk-test-results/action.yml b/.github/actions/upload-gatk-test-results/action.yml index e2c267e2c75..73cce8c0f8d 100644 --- a/.github/actions/upload-gatk-test-results/action.yml +++ b/.github/actions/upload-gatk-test-results/action.yml @@ -34,7 +34,7 @@ runs: steps: - name: Upload test results if: always() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: # ternary expression syntax is workaround found here https://github.com/actions/runner/issues/409 name: test-results-${{ inputs.is-docker == 'true' && 'docker-' || '' }}${{ matrix.Java }}-${{ matrix.testType }} diff --git a/.github/workflows/gatk-tests.yml b/.github/workflows/gatk-tests.yml index 15a550e859e..b6b70e2e265 100644 --- a/.github/workflows/gatk-tests.yml +++ b/.github/workflows/gatk-tests.yml @@ -64,7 +64,7 @@ jobs: outputs: type=docker,dest=/tmp/myimage.tar # By uploading the docker image as an artifact we save ourselves the requirement for the image to be built with ghcr push permission - name: Upload docker image as artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: gatkDockerImage path: /tmp/myimage.tar @@ -204,7 +204,7 @@ jobs: cp scripts/docker/dockertest.gradle . - name: Download docker image artifact - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: gatkDockerImage path: /tmp From 373df456da85d3a4520059c69af8b42e768941f4 Mon Sep 17 00:00:00 2001 From: Louis Bergelson Date: Wed, 15 Jan 2025 17:23:11 -0500 Subject: [PATCH 3/5] Close a FeatureReader after use in HaplotypeCallerEngine (#9078) --- .../tools/walkers/haplotypecaller/HaplotypeCallerEngine.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java index 36918f1fb91..91564059e2d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java @@ -236,8 +236,9 @@ public HaplotypeCallerEngine(final HaplotypeCallerArgumentCollection hcArgs, Ass // Parse the user provided custom ploidy regions into ploidyRegions object containing SimpleCounts if (this.hcArgs.ploidyRegions != null) { - FeatureDataSource ploidyDataSource = new FeatureDataSource<>(this.hcArgs.ploidyRegions, FeatureDataSource.DEFAULT_QUERY_LOOKAHEAD_BASES, NamedFeature.class); - ploidyDataSource.forEach(r -> this.ploidyRegions.add(new SimpleCount(r))); + try (FeatureDataSource ploidyDataSource = new FeatureDataSource<>(this.hcArgs.ploidyRegions, FeatureDataSource.DEFAULT_QUERY_LOOKAHEAD_BASES, NamedFeature.class)) { + ploidyDataSource.forEach(r -> this.ploidyRegions.add(new SimpleCount(r))); + } } for (SimpleCount region : this.ploidyRegions) { From c819ff6d08b0ed9e168187f0ef09d0ec879bbcc4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 16 Jan 2025 00:03:53 -0500 Subject: [PATCH 4/5] Bump ch.qos.logback:logback-core from 1.4.14 to 1.5.13 (#9079) * Bump ch.qos.logback:logback-core from 1.4.14 to 1.5.13 Bumps [ch.qos.logback:logback-core](https://github.com/qos-ch/logback) from 1.4.14 to 1.5.13. - [Commits](https://github.com/qos-ch/logback/compare/v_1.4.14...v_1.5.13) --- updated-dependencies: - dependency-name: ch.qos.logback:logback-core dependency-type: direct:production ... Signed-off-by: dependabot[bot] * also update logback-classic to keep it in sync --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Louis Bergelson --- build.gradle | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.gradle b/build.gradle index 184dec2632c..ae83d138266 100644 --- a/build.gradle +++ b/build.gradle @@ -358,8 +358,8 @@ dependencies { implementation 'org.apache.commons:commons-compress:1.26.0' implementation 'org.apache.ivy:ivy:2.5.2' implementation 'org.apache.commons:commons-text:1.10.0' because 'of https://nvd.nist.gov/vuln/detail/CVE-2022-42889' - implementation 'ch.qos.logback:logback-classic:1.4.14' - implementation 'ch.qos.logback:logback-core:1.4.14' + implementation 'ch.qos.logback:logback-classic:1.5.13' + implementation 'ch.qos.logback:logback-core:1.5.13' implementation 'org.apache.avro:avro:1.12.0' implementation 'io.airlift:aircompressor:0.27' implementation 'org.scala-lang:scala-library:2.13.14' From 717d8a95f4ba4ecae67d084c5709ba543f1b1497 Mon Sep 17 00:00:00 2001 From: Louis Bergelson Date: Thu, 16 Jan 2025 16:33:54 -0500 Subject: [PATCH 5/5] Remove unecessary calculation and methods in FlowBasedRead (#9077) * remove the field FlowBaseRead.forwardSequence was calculated at non-trivial cost and then only used for it's length * removed the method seqlength because it was a more convoluted of getLength * fixed some typos and finals because I couldn't help myself --- .../FlowBasedAlignmentLikelihoodEngine.java | 2 +- .../hellbender/utils/read/FlowBasedRead.java | 49 ++++++------------- .../read/FlowBasedReadIntegrationTest.java | 2 +- .../utils/read/FlowBasedReadUnitTest.java | 4 +- 4 files changed, 18 insertions(+), 39 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/FlowBasedAlignmentLikelihoodEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/FlowBasedAlignmentLikelihoodEngine.java index fa172768608..66914a4d82d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/FlowBasedAlignmentLikelihoodEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/FlowBasedAlignmentLikelihoodEngine.java @@ -264,7 +264,7 @@ public double haplotypeReadMatching(final FlowBasedHaplotype haplotype, final Fl read.getTrimmedEnd()).getLeft(); final int haplotypeLength = haplotypeEnd - haplotypeStart; - final int readLength = read.seqLength(); + final int readLength = read.getLength(); //in case there is a deletion on the haplotype and hte read falls inside the deletion (thus length of the read is diff --git a/src/main/java/org/broadinstitute/hellbender/utils/read/FlowBasedRead.java b/src/main/java/org/broadinstitute/hellbender/utils/read/FlowBasedRead.java index aaf326fa5e8..a461c3fa79c 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/read/FlowBasedRead.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/read/FlowBasedRead.java @@ -26,10 +26,10 @@ * is coded in the tags of the BAM and is given in flow space). This code is not used in production, but was used in * development and testing * - * A common usage pattern is to covert a GATKRead into a FlowBasedRead. Additionally + * A common usage pattern is to covert a GATKRead into a FlowBasedRead. Additionally, * a SAMRecord can also be converted into a FlowBasedRead. Follows a common usage pattern: * - * For a self contained example of a usage pattern, see {@link FlowBasedReadUtils#convertToFlowBasedRead(GATKRead, SAMFileHeader)} + * For a self-contained example of a usage pattern, see {@link FlowBasedReadUtils#convertToFlowBasedRead(GATKRead, SAMFileHeader)} * **/ @@ -72,22 +72,17 @@ public class FlowBasedRead extends SAMRecordToGATKReadAdapter implements GATKRea /** * The sam record from which this flow based read originated */ - private SAMRecord samRecord; + private final SAMRecord samRecord; /** - * The read's sequence, always in forward direction - */ - private byte[] forwardSequence; - - /** - * The flow key for the read - i.e. lengths of hmers in an flow order. + * The flow key for the read - i.e. lengths of hmers in flow order. * * For example, assuming a flow order of TGCA, and a forward sequence of GGAAT, the key will be 0,2,0,2,1 */ private int[] key; /** - * the maping of key elements to their origin locations in the sequence. Entry n contains the offset in the sequence + * the mapping of key elements to their origin locations in the sequence. Entry n contains the offset in the sequence * where the hmer described by this key element starts. */ private int [] flow2base; @@ -95,7 +90,7 @@ public class FlowBasedRead extends SAMRecordToGATKReadAdapter implements GATKRea /** * The maximal length of an hmer that can be encoded (normally in the 10-12 range) */ - private int maxHmer; + private final int maxHmer; /** * The value to fill the flow matrix with. Normally 0.001 @@ -104,20 +99,20 @@ public class FlowBasedRead extends SAMRecordToGATKReadAdapter implements GATKRea private double perHmerMinErrorProb; /** - * The order in which flow key in encoded (See decription for key field). Flow order may be wrapped if a longer one + * The order in which flow key in encoded (See description for key field). Flow order may be wrapped if a longer one * needed. */ private byte[] flowOrder; /** - * The probability matrix for this read. [n][m] position represents that probablity that an hmer of n length will be - * present at the m key position. Therefore, the first dimention is in the maxHmer order, where the second dimension + * The probability matrix for this read. [n][m] position represents that probability that an hmer of n length will be + * present at the m key position. Therefore, the first dimension is in the maxHmer order, where the second dimension * is length(key). */ private double[][] flowMatrix; /** - * The validity status of the key. Certain operations may produce undefined/errornous results. This is signaled by + * The validity status of the key. Certain operations may produce undefined/erroneous results. This is signaled by * the read being marked with a validKey == false */ private boolean validKey = true; @@ -199,7 +194,7 @@ public FlowBasedRead(final GATKRead read, final String flowOrder, final int maxH * @param samRecord record from SAM file * @param flowOrder flow order (single cycle) * @param maxHmer maximal hmer to keep in the flow matrix - * @param fbargs arguments that control resoltion of the flow matrix + * @param fbargs arguments that control resolution of the flow matrix */ public FlowBasedRead(final SAMRecord samRecord, final String flowOrder, final int maxHmer, final FlowBasedArgumentCollection fbargs) { super(samRecord); @@ -208,9 +203,8 @@ public FlowBasedRead(final SAMRecord samRecord, final String flowOrder, final in this.fbargs = fbargs; this.maxHmer = maxHmer; this.samRecord = samRecord; - forwardSequence = getForwardSequence(); - // read flow matrix in. note that below code contains accomodates for old formats + // read flow matrix in. note that below code contains accommodates for old formats if ( samRecord.hasAttribute(FLOW_MATRIX_TAG_NAME) ) { perHmerMinErrorProb = fbargs.fillingValue; totalMinErrorProb = perHmerMinErrorProb; @@ -408,18 +402,6 @@ public Direction getDirection(){ } - private byte[] getForwardSequence(){ - if (!isReverseStrand()) { - return samRecord.getReadBases(); - } else { - final byte[] result = new byte[samRecord.getReadBases().length]; - System.arraycopy(samRecord.getReadBases(), 0, result, 0, result.length); - SequenceUtil.reverseComplement(result); - return result; - } - } - - private int[] getAttributeAsIntArray(final String attributeName, final boolean isSigned) { ReadUtils.assertAttributeNameIsLegal(attributeName); final Object attributeValue = this.samRecord.getAttribute(attributeName); @@ -460,7 +442,7 @@ public boolean isValid() { * @return */ public double getProb(final int flow, final int hmer) { - double prob = flowMatrix[hmer < maxHmer ? hmer : maxHmer][flow]; + double prob = flowMatrix[Math.min(hmer, maxHmer)][flow]; return (prob <= 1) ? prob : 1; } @@ -525,7 +507,7 @@ private void implementMatrixMods(final int[] flowMatrixModsInstructions) { flowMatrix[hmer2][pos] = flowMatrix[hmer][pos]; } - // if we are copying bacwards, zero out source + // if we are copying backwards, zero out source if (hmer > hmer2) flowMatrix[hmer][pos] = 0; } @@ -783,9 +765,6 @@ public int totalKeyBases() { return sum; } - public int seqLength(){ - return forwardSequence.length; - } public boolean isBaseClipped() { return baseClipped; } diff --git a/src/test/java/org/broadinstitute/hellbender/utils/read/FlowBasedReadIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/utils/read/FlowBasedReadIntegrationTest.java index 311dfd15d10..b1bf0e75f32 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/read/FlowBasedReadIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/read/FlowBasedReadIntegrationTest.java @@ -55,7 +55,7 @@ public void testReads(final String inputFile, final String outputPrefix, final S new SAMRecordToGATKReadAdapter(i.next()), reader.getFileHeader()); fbr.applyAlignment(); - Assert.assertEquals(fbr.totalKeyBases(), fbr.seqLength()); + Assert.assertEquals(fbr.totalKeyBases(), fbr.getLength()); if ( limitCount < 1000 && outputPrefix != null ) { try ( final FileWriter fos = new FileWriter(outputPrefix + "." + Integer.toString(count) + ".key.txt") ) { diff --git a/src/test/java/org/broadinstitute/hellbender/utils/read/FlowBasedReadUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/read/FlowBasedReadUnitTest.java index 62fa3949ee0..417750a53bd 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/read/FlowBasedReadUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/read/FlowBasedReadUnitTest.java @@ -44,7 +44,7 @@ void testBAMFormatParsing() throws Exception{ String expectedFile = outputDir + "sample." + curRead + ".key.txt"; if (!UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS) { - Assert.assertEquals(fbr.totalKeyBases(), fbr.seqLength()); + Assert.assertEquals(fbr.totalKeyBases(), fbr.getLength()); try (FileWriter fos = new FileWriter(tempOutputDir + "/" + curRead + ".key.txt")) { fbr.writeKey(fos); } @@ -91,7 +91,7 @@ void testBAMFormatParsingWithT0() throws Exception{ String expectedFile = outputDir + "sample.t0." + curRead + ".key.txt"; if ( !UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ) { - Assert.assertEquals(fbr.totalKeyBases(), fbr.seqLength()); + Assert.assertEquals(fbr.totalKeyBases(), fbr.getLength()); try (FileWriter fos = new FileWriter(tempOutputDir + "/" + curRead + ".key.txt")) { fbr.writeKey(fos); }