From 8fbd27e8bf2ced361c9125acde74d2f0936c0cd7 Mon Sep 17 00:00:00 2001 From: Karan Jaisingh Date: Mon, 13 Jan 2025 10:55:00 -0500 Subject: [PATCH] Added sex revisions for male GT --- .../spark/sv/utils/GATKSVVCFConstants.java | 21 ++-- .../walkers/sv/SVReviseAbnormalAllosomes.java | 13 ++- .../tools/walkers/sv/SVReviseLargeCnvs.java | 41 ++++--- .../walkers/sv/SVReviseMultiallelicCnvs.java | 22 ++-- .../walkers/sv/SVReviseOverlappingCnvCns.java | 107 +++++++++--------- .../walkers/sv/SVReviseOverlappingCnvGts.java | 58 +++++----- 6 files changed, 130 insertions(+), 132 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/GATKSVVCFConstants.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/GATKSVVCFConstants.java index 3b06d240e19..5e2920f8204 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/GATKSVVCFConstants.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/GATKSVVCFConstants.java @@ -165,35 +165,30 @@ public enum ComplexVariantSubtype { public static final String LOW_QS_SCORE_FILTER_KEY = "LOW_QS"; public static final String FREQUENCY_FILTER_KEY = "FREQ"; - // SVCleanPt1a - public static final String EV = "EV"; - public static final List EV_VALUES = Arrays.asList( - null, "RD", "PE", "RD,PE", "SR", "RD,SR", "PE,SR", "RD,PE,SR" - ); + // CleanVcf public static final String ME = "ME"; public static final String VAR_GQ = "varGQ"; public static final String MULTIALLELIC = "MULTIALLELIC"; public static final String UNRESOLVED = "UNRESOLVED"; public static final String HIGH_SR_BACKGROUND = "HIGH_SR_BACKGROUND"; public static final String BOTHSIDES_SUPPORT = "BOTHSIDES_SUPPORT"; + public static final String PESR_GT_OVERDISPERSION = "PESR_GT_OVERDISPERSION"; public static final String REVISED_EVENT = "REVISED_EVENT"; - public static final String RD_CN = "RD_CN"; - - // SVCleanPt1b - public static final String RD_GQ = "RD_GQ"; public static final String MULTI_CNV = "MULTI_CNV"; - // SVCleanPt4 - public static final String PESR_GT_OVERDISPERSION = "PESR_GT_OVERDISPERSION"; + public static final String RD_CN = "RD_CN"; + public static final String RD_GQ = "RD_GQ"; public static final String PE_GT = "PE_GT"; public static final String SR_GT = "SR_GT"; public static final String PE_GQ = "PE_GQ"; public static final String SR_GQ = "SR_GQ"; public static final String CNV = "CNV"; - - // SVCleanPt5 public static final String UNR = "UNR"; public static final String EVENT = "EVENT"; + public static final String EV = "EV"; + public static final List EV_VALUES = Arrays.asList( + null, "RD", "PE", "RD,PE", "SR", "RD,SR", "PE,SR", "RD,PE,SR" + ); public static final Set FILTER_VCF_LINES = new HashSet<>(Arrays.asList( "CIPOS", "CIEND", "RMSSTD", "source", "bcftools", "GATKCommandLine", "#CHROM" )); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVReviseAbnormalAllosomes.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVReviseAbnormalAllosomes.java index 38f536eee63..fecb8860445 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVReviseAbnormalAllosomes.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVReviseAbnormalAllosomes.java @@ -81,18 +81,19 @@ public void closeTool() { } @Override - public void apply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) { - VariantContextBuilder builder = new VariantContextBuilder(variant); - if (!variant.getAttributeAsBoolean(GATKSVVCFConstants.REVISED_EVENT, false)) { + public void apply(final VariantContext variant, final ReadsContext readsContext, + final ReferenceContext referenceContext, final FeatureContext featureContext) { + final VariantContextBuilder builder = new VariantContextBuilder(variant); + if (variant.getAttributeAsBoolean(GATKSVVCFConstants.REVISED_EVENT, false)) { processRevisedSex(variant, builder); } vcfWriter.add(builder.make()); } private void processRevisedSex(final VariantContext variant, final VariantContextBuilder builder) { - List genotypes = variant.getGenotypes(); - List updatedGenotypes = new ArrayList<>(genotypes.size()); - for (Genotype genotype : genotypes) { + final List genotypes = variant.getGenotypes(); + final List updatedGenotypes = new ArrayList<>(genotypes.size()); + for (final Genotype genotype : genotypes) { if (Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.RD_CN, 0).toString()) > 0) { int newRdCn = Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.RD_CN).toString()) - 1; GenotypeBuilder gb = new GenotypeBuilder(genotype); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVReviseLargeCnvs.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVReviseLargeCnvs.java index ed8c892a10a..508ea310eb4 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVReviseLargeCnvs.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVReviseLargeCnvs.java @@ -134,7 +134,7 @@ public void closeTool() { @Override public void apply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) { // Initialize data structures - VariantContextBuilder builder = new VariantContextBuilder(variant); + final VariantContextBuilder builder = new VariantContextBuilder(variant); List genotypes = variant.getGenotypes(); // Process variants @@ -152,9 +152,9 @@ public void apply(final VariantContext variant, final ReadsContext readsContext, private void processMultiallelic(final VariantContextBuilder builder, final List genotypes) { int numGtOver2 = 0; for (Genotype genotype : genotypes) { - Integer peGt = genotype.hasExtendedAttribute(GATKSVVCFConstants.PE_GT) ? + final Integer peGt = genotype.hasExtendedAttribute(GATKSVVCFConstants.PE_GT) ? Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.PE_GT).toString()) : null; - Integer srGt = genotype.hasExtendedAttribute(GATKSVVCFConstants.SR_GT) ? + final Integer srGt = genotype.hasExtendedAttribute(GATKSVVCFConstants.SR_GT) ? Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.SR_GT).toString()) : null; int gt; if (peGt == null) { @@ -166,9 +166,9 @@ private void processMultiallelic(final VariantContextBuilder builder, final List } else if (peGt == 0) { gt = srGt; } else { - Integer peGq = genotype.hasExtendedAttribute(GATKSVVCFConstants.PE_GQ) ? + final Integer peGq = genotype.hasExtendedAttribute(GATKSVVCFConstants.PE_GQ) ? Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.PE_GQ).toString()) : null; - Integer srGq = genotype.hasExtendedAttribute(GATKSVVCFConstants.SR_GQ) ? + final Integer srGq = genotype.hasExtendedAttribute(GATKSVVCFConstants.SR_GQ) ? Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.SR_GQ).toString()) : null; if (peGq != null && srGq != null && peGq >= srGq) { gt = peGt; @@ -177,7 +177,7 @@ private void processMultiallelic(final VariantContextBuilder builder, final List } } if (gt > 2) { - numGtOver2++; + numGtOver2 += 1; } } if (numGtOver2 > maxVF) { @@ -204,7 +204,7 @@ private List processLargeDeletions(final VariantContext variant, final } boolean gt5kbFilter = false; - List allowedAlleleIndices = Arrays.asList(-1, 0, 1); + final List allowedAlleleIndices = Arrays.asList(-1, 0, 1); if (genotypes.stream().anyMatch(g -> g.getAlleles().stream().anyMatch(a -> !allowedAlleleIndices.contains(variant.getAlleleIndex(a))))) { gt5kbFilter = true; } else if (variant.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0) >= MIN_MULTIALLELIC_EVENT_SIZE && !multiallelicFilter) { @@ -213,8 +213,8 @@ private List processLargeDeletions(final VariantContext variant, final List updatedGenotypes = new ArrayList<>(genotypes.size()); if (gt5kbFilter) { - for (Genotype genotype : genotypes) { - GenotypeBuilder gb = new GenotypeBuilder(genotype); + for (final Genotype genotype : genotypes) { + final GenotypeBuilder gb = new GenotypeBuilder(genotype); if (!genotype.isNoCall()) { if (genotype.hasGQ() && Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.RD_CN, 0).toString()) >= 2) { gb.alleles(Arrays.asList(variant.getReference(), variant.getReference())); @@ -231,7 +231,7 @@ private List processLargeDeletions(final VariantContext variant, final updatedGenotypes = new ArrayList<>(genotypes.size()); if (multiallelicFilter) { - for (Genotype genotype : genotypes) { + for (final Genotype genotype : genotypes) { GenotypeBuilder gb = new GenotypeBuilder(genotype); gb.noGQ(); gb.alleles(Arrays.asList(Allele.NO_CALL)); @@ -257,7 +257,7 @@ private List processLargeDuplications(final VariantContext variant, fi boolean multiallelicFilter = false; if (variant.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0) >= MIN_LARGE_EVENT_SIZE) { Map sampleRdCn = new HashMap<>(); - for (Genotype genotype : genotypes) { + for (final Genotype genotype : genotypes) { if (!outlierSamples.contains(genotype.getSampleName()) && genotype.hasExtendedAttribute(GATKSVVCFConstants.RD_CN)) { sampleRdCn.put(genotype.getSampleName(), Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.RD_CN).toString())); } @@ -273,7 +273,7 @@ private List processLargeDuplications(final VariantContext variant, fi } boolean gt5kbFilter = false; - List allowedAlleleIndices = Arrays.asList(-1, 0, 1); + final List allowedAlleleIndices = Arrays.asList(-1, 0, 1); if (genotypes.stream().anyMatch(g -> g.getAlleles().stream().anyMatch(a -> !allowedAlleleIndices.contains(variant.getAlleleIndex(a))))) { gt5kbFilter = true; } else if (variant.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0) >= MIN_MULTIALLELIC_EVENT_SIZE && !multiallelicFilter) { @@ -282,8 +282,8 @@ private List processLargeDuplications(final VariantContext variant, fi List updatedGenotypes = new ArrayList<>(genotypes.size()); if (gt5kbFilter) { - for (Genotype genotype : genotypes) { - GenotypeBuilder gb = new GenotypeBuilder(genotype); + for (final Genotype genotype : genotypes) { + final GenotypeBuilder gb = new GenotypeBuilder(genotype); if (!genotype.isNoCall()) { if (genotype.hasGQ() && Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.RD_CN, 3).toString()) <= 2) { gb.alleles(Arrays.asList(variant.getReference(), variant.getReference())); @@ -300,8 +300,8 @@ private List processLargeDuplications(final VariantContext variant, fi updatedGenotypes = new ArrayList<>(genotypes.size()); if (multiallelicFilter) { - for (Genotype genotype : genotypes) { - GenotypeBuilder gb = new GenotypeBuilder(genotype); + for (final Genotype genotype : genotypes) { + final GenotypeBuilder gb = new GenotypeBuilder(genotype); gb.noGQ(); gb.alleles(Arrays.asList(Allele.NO_CALL)); gb.attribute(GATKSVVCFConstants.COPY_NUMBER_FORMAT, genotype.getExtendedAttribute(GATKSVVCFConstants.RD_CN)); @@ -319,16 +319,15 @@ private List processLargeDuplications(final VariantContext variant, fi } public boolean isCalled(final VariantContextBuilder builder, final List genotypes) { - for (Genotype genotype : genotypes) { + for (final Genotype genotype : genotypes) { if (!isNoCallGt(genotype.getAlleles())) { return true; } } if (builder.getAttributes().getOrDefault(GATKSVVCFConstants.SVTYPE, "").equals(GATKSVVCFConstants.CNV)) { - for (Genotype genotype : genotypes) { - final int cn = Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.COPY_NUMBER_FORMAT, 2).toString()); - if (cn != 2) { + for (final Genotype genotype : genotypes) { + if (Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.COPY_NUMBER_FORMAT, 2).toString()) != 2) { return true; } } @@ -337,7 +336,7 @@ public boolean isCalled(final VariantContextBuilder builder, final List alleles) { + private boolean isNoCallGt(final List alleles) { if (alleles.size() == 1 && alleles.get(0).isReference()) return true; else if (alleles.size() == 2 && alleles.get(0).isReference() && alleles.get(1).isReference()) return true; else if (alleles.size() == 1 && alleles.get(0).isNoCall()) return true; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVReviseMultiallelicCnvs.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVReviseMultiallelicCnvs.java index 2bcdfefc1cb..8da4fd855fa 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVReviseMultiallelicCnvs.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVReviseMultiallelicCnvs.java @@ -3,9 +3,6 @@ import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; import htsjdk.variant.variantcontext.writer.VariantContextWriter; -import htsjdk.variant.vcf.VCFHeader; -import htsjdk.variant.vcf.VCFHeaderLineType; -import htsjdk.variant.vcf.VCFFormatHeaderLine; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.BetaFeature; @@ -93,7 +90,8 @@ public void closeTool() { } @Override - protected void nthPassApply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext, int n) { + protected void nthPassApply(final VariantContext variant, final ReadsContext readsContext, + final ReferenceContext referenceContext, final FeatureContext featureContext, int n) { switch (n) { case 0: firstPassApply(variant); @@ -111,7 +109,7 @@ public void firstPassApply(final VariantContext variant) { overlappingVariantsBuffer.removeIf(vc -> !vc.getContig().equals(variant.getContig()) || (vc.getStart() + vc.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0)) < variant.getStart()); - for (VariantContext bufferedVariant : overlappingVariantsBuffer) { + for (final VariantContext bufferedVariant : overlappingVariantsBuffer) { if (overlaps(bufferedVariant, variant)) { processVariantPair(bufferedVariant, variant); } @@ -124,16 +122,16 @@ public void secondPassApply(final VariantContext variant) { return; } - VariantContextBuilder builder = new VariantContextBuilder(variant); + final VariantContextBuilder builder = new VariantContextBuilder(variant); vcfWriter.add(builder.make()); } - private void processVariantPair(VariantContext v1, VariantContext v2) { + private void processVariantPair(final VariantContext v1, final VariantContext v2) { // Determine larger variant VariantContext largerVariant = v1; VariantContext smallerVariant = v2; - int length1 = v1.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0); - int length2 = v2.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0); + final int length1 = v1.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0); + final int length2 = v2.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0); int smallerLength = length2; // Swap variants if necessary @@ -148,14 +146,14 @@ private void processVariantPair(VariantContext v1, VariantContext v2) { largerVariant.getStart() + largerVariant.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0), smallerVariant.getStart() + smallerVariant.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0) ); - int maxStart = Math.max(largerVariant.getStart(), smallerVariant.getStart()); - int overlapLength = minEnd - maxStart + 1; + final int maxStart = Math.max(largerVariant.getStart(), smallerVariant.getStart()); + final int overlapLength = minEnd - maxStart + 1; if (overlapLength <= 0) { return; } // Filter variant based on conditions - double coverage = (double) overlapLength / smallerLength; + final double coverage = (double) overlapLength / smallerLength; if (coverage > 0.5 && !filteredVariantIds.contains(largerVariant.getID())) { filteredVariantIds.add(smallerVariant.getID()); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVReviseOverlappingCnvCns.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVReviseOverlappingCnvCns.java index fc6f2219b14..f7e607a17a5 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVReviseOverlappingCnvCns.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVReviseOverlappingCnvCns.java @@ -5,9 +5,6 @@ import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; import htsjdk.variant.variantcontext.writer.VariantContextWriter; -import htsjdk.variant.vcf.VCFHeader; -import htsjdk.variant.vcf.VCFHeaderLineType; -import htsjdk.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.BetaFeature; @@ -102,7 +99,8 @@ public void closeTool() { protected void afterNthPass(int n) {} @Override - protected void nthPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext, int n) { + protected void nthPassApply(final VariantContext variant, final ReadsContext readsContext, + final ReferenceContext referenceContext, final FeatureContext featureContext, int n) { switch (n) { case 0: firstPassApply(variant); @@ -120,11 +118,12 @@ public void firstPassApply(final VariantContext variant) { } // Flag sample as having an abnormal copy number if it passes certain conditions - for (String sample : variant.getSampleNames()) { - Genotype genotype = variant.getGenotype(sample); - int rdCn = Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.RD_CN).toString()); + for (final String sample : variant.getSampleNames()) { + final Genotype genotype = variant.getGenotype(sample); + if (!genotype.hasExtendedAttribute(GATKSVVCFConstants.RD_CN)) continue; - String svType = variant.getAttributeAsString(GATKSVVCFConstants.SVTYPE, ""); + final int rdCn = Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.RD_CN).toString()); + final String svType = variant.getAttributeAsString(GATKSVVCFConstants.SVTYPE, ""); if ((svType.equals(GATKSVVCFConstants.SYMB_ALT_STRING_DEL) && rdCn < 2) || (svType.equals(GATKSVVCFConstants.SYMB_ALT_STRING_DUP) && rdCn > 2)) { abnormalRdCn.computeIfAbsent(variant.getID(), k -> new HashSet<>()).add(sample); } @@ -133,7 +132,7 @@ public void firstPassApply(final VariantContext variant) { // Process overlaps with variants in the buffer overlappingVariantsBuffer.removeIf(vc -> !vc.getContig().equals(variant.getContig()) || (vc.getStart() + vc.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0)) < variant.getStart()); - for (VariantContext bufferedVariant : overlappingVariantsBuffer) { + for (final VariantContext bufferedVariant : overlappingVariantsBuffer) { if (overlaps(variant, bufferedVariant)) { adjustCopyNumber(variant, bufferedVariant); } @@ -151,10 +150,10 @@ public void secondPassApply(final VariantContext variant) { private void adjustCopyNumber(final VariantContext v1, final VariantContext v2) { // Determine larger variant + final int length1 = v1.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0); + final int length2 = v2.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0); VariantContext largerVariant = v1; VariantContext smallerVariant = v2; - int length1 = v1.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0); - int length2 = v2.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0); int largerLength = length1; int smallerLength = length2; @@ -167,43 +166,43 @@ private void adjustCopyNumber(final VariantContext v1, final VariantContext v2) } // Get variant attributes - String variantId1 = largerVariant.getID(); - String variantId2 = smallerVariant.getID(); - Map variantRdCn1 = getRdCnForVariant(largerVariant); - Map variantRdCn2 = getRdCnForVariant(smallerVariant); - Map> variantSupport1 = getSupportForVariant(largerVariant); - Map> variantSupport2 = getSupportForVariant(smallerVariant); - String svType1 = largerVariant.getAttributeAsString(GATKSVVCFConstants.SVTYPE, ""); - String svType2 = smallerVariant.getAttributeAsString(GATKSVVCFConstants.SVTYPE, ""); + final String variantId1 = largerVariant.getID(); + final String variantId2 = smallerVariant.getID(); + final Map variantRdCn1 = getRdCnForVariant(largerVariant); + final Map variantRdCn2 = getRdCnForVariant(smallerVariant); + final Map> variantSupport1 = getSupportForVariant(largerVariant); + final Map> variantSupport2 = getSupportForVariant(smallerVariant); + final String svType1 = largerVariant.getAttributeAsString(GATKSVVCFConstants.SVTYPE, ""); + final String svType2 = smallerVariant.getAttributeAsString(GATKSVVCFConstants.SVTYPE, ""); // Calculate overlap - int minEnd = Math.min( + final int minEnd = Math.min( largerVariant.getStart() + largerVariant.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0), smallerVariant.getStart() + smallerVariant.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0) ); - int maxStart = Math.max(largerVariant.getStart(), smallerVariant.getStart()); - int lengthOverlap = minEnd - maxStart + 1; - double overlap1 = (double) lengthOverlap / (double) largerLength; - double overlap2 = (double) lengthOverlap / (double) smallerLength; + final int maxStart = Math.max(largerVariant.getStart(), smallerVariant.getStart()); + final int lengthOverlap = minEnd - maxStart + 1; + final double overlap1 = (double) lengthOverlap / (double) largerLength; + final double overlap2 = (double) lengthOverlap / (double) smallerLength; // Get samples with abnormal CN across both variants - Set samples = new HashSet<>(abnormalRdCn.getOrDefault(variantId1, Collections.emptySet())); + final Set samples = new HashSet<>(abnormalRdCn.getOrDefault(variantId1, Collections.emptySet())); samples.retainAll(abnormalRdCn.getOrDefault(variantId2, Collections.emptySet())); // Iterate through samples to test against conditions for (String sample : samples) { - String id1 = variantId1 + "@" + sample; - String id2 = variantId2 + "@" + sample; + final String id1 = variantId1 + "@" + sample; + final String id2 = variantId2 + "@" + sample; if (revisedComplete.contains(id1)) { continue; } // Initialize variables for evaluation - int rdCn1 = revisedCopyNumbers.getOrDefault(variantId1, Collections.emptyMap()).getOrDefault(sample, variantRdCn1.get(sample)); - int rdCn2 = revisedCopyNumbers.getOrDefault(variantId2, Collections.emptyMap()).getOrDefault(sample, variantRdCn2.get(sample)); - Set support1 = variantSupport1.get(sample); - Set support2 = variantSupport2.get(sample); - Genotype genotype2 = smallerVariant.getGenotype(sample); + final int rdCn1 = revisedCopyNumbers.getOrDefault(variantId1, Collections.emptyMap()).getOrDefault(sample, variantRdCn1.get(sample)); + final int rdCn2 = revisedCopyNumbers.getOrDefault(variantId2, Collections.emptyMap()).getOrDefault(sample, variantRdCn2.get(sample)); + final Set support1 = variantSupport1.get(sample); + final Set support2 = variantSupport2.get(sample); + final Genotype genotype2 = smallerVariant.getGenotype(sample); // Condition 1: Smaller depth call is being driven by larger call if (support1.contains(GATKSVVCFConstants.EV_VALUES.get(1)) && support1.size() > 1 @@ -267,17 +266,27 @@ else if (support1.contains(GATKSVVCFConstants.EV_VALUES.get(1)) } } + private void makeRevision(final String id, final int val) { + final String[] tokens = id.split("@"); + final String variantId = tokens[0]; + final String sample = tokens[1]; + revisedCopyNumbers.computeIfAbsent(variantId, k -> new HashMap<>()).put(sample, val); + if (val == 2) { + revisedComplete.add(id); + } + } + private void processRevisedCn(final VariantContextBuilder builder, final VariantContext variant) { // Initialize data structures final String variantID = variant.getID(); - List genotypes = builder.getGenotypes(); - List updatedGenotypes = new ArrayList<>(genotypes.size()); + final List genotypes = builder.getGenotypes(); + final List updatedGenotypes = new ArrayList<>(genotypes.size()); // Replace revised alleles and copy numbers - for (Genotype genotype : genotypes) { - String sampleName = genotype.getSampleName(); + for (final Genotype genotype : genotypes) { + final String sampleName = genotype.getSampleName(); if (revisedCopyNumbers.get(variantID).containsKey(sampleName)) { - GenotypeBuilder gb = new GenotypeBuilder(genotype); + final GenotypeBuilder gb = new GenotypeBuilder(genotype); gb.alleles(Arrays.asList(variant.getReference(), variant.getAlternateAllele(0))); gb.attribute(GATKSVVCFConstants.RD_CN, revisedCopyNumbers.get(variantID).get(sampleName)); updatedGenotypes.add(gb.make()); @@ -288,22 +297,12 @@ private void processRevisedCn(final VariantContextBuilder builder, final Variant builder.genotypes(updatedGenotypes); } - private void makeRevision(final String id, final int val) { - String[] tokens = id.split("@"); - String variantId = tokens[0]; - String sample = tokens[1]; - revisedCopyNumbers.computeIfAbsent(variantId, k -> new HashMap<>()).put(sample, val); - if (val == 2) { - revisedComplete.add(id); - } - } - private Map> getSupportForVariant(final VariantContext variant) { Map> supportMap = new HashMap<>(); for (String sample : variant.getSampleNames()) { - Genotype genotype = variant.getGenotype(sample); - String supportStr = genotype.hasExtendedAttribute(GATKSVVCFConstants.EV) ? genotype.getExtendedAttribute(GATKSVVCFConstants.EV).toString() : ""; - Set supportSet = new HashSet<>(); + final Genotype genotype = variant.getGenotype(sample); + final String supportStr = genotype.hasExtendedAttribute(GATKSVVCFConstants.EV) ? genotype.getExtendedAttribute(GATKSVVCFConstants.EV).toString() : ""; + final Set supportSet = new HashSet<>(); if (!supportStr.isEmpty()) { supportSet.addAll(Arrays.asList(supportStr.split(","))); } @@ -313,9 +312,9 @@ private Map> getSupportForVariant(final VariantContext varia } private Map getRdCnForVariant(final VariantContext variant) { - Map rdCnMap = new HashMap<>(); + final Map rdCnMap = new HashMap<>(); for (String sample : variant.getSampleNames()) { - Genotype genotype = variant.getGenotype(sample); + final Genotype genotype = variant.getGenotype(sample); if (genotype.hasExtendedAttribute(GATKSVVCFConstants.RD_CN)) { rdCnMap.put(sample, Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.RD_CN).toString())); } @@ -324,12 +323,12 @@ private Map getRdCnForVariant(final VariantContext variant) { } private boolean isDelDup(final VariantContext variant) { - String svType = variant.getAttributeAsString(GATKSVVCFConstants.SVTYPE, ""); + final String svType = variant.getAttributeAsString(GATKSVVCFConstants.SVTYPE, ""); return svType.equals(GATKSVVCFConstants.SYMB_ALT_STRING_DEL) || svType.equals(GATKSVVCFConstants.SYMB_ALT_STRING_DUP); } private boolean isLarge(final VariantContext variant, final int minSize) { - int variantLength = Math.abs(variant.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0)); + final int variantLength = Math.abs(variant.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0)); return variantLength >= minSize; } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVReviseOverlappingCnvGts.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVReviseOverlappingCnvGts.java index 7ed17a2918e..59798aa3444 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVReviseOverlappingCnvGts.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVReviseOverlappingCnvGts.java @@ -132,7 +132,7 @@ public void firstPassApply(final VariantContext variant) { // Process overlaps with variants in the buffer overlappingVariantsBuffer.removeIf(vc -> !vc.getContig().equals(variant.getContig()) || (vc.getStart() + vc.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0)) < variant.getStart()); - for (VariantContext bufferedVariant : overlappingVariantsBuffer) { + for (final VariantContext bufferedVariant : overlappingVariantsBuffer) { if (overlaps(bufferedVariant, variant)) { processOverlap(bufferedVariant, variant); } @@ -153,14 +153,16 @@ public void secondPassApply(final VariantContext variant) { // Initialize revisedRdCn value for each variant for (final String sampleName : samples) { final Genotype genotype = variant.getGenotype(sampleName); - final String rdCn = (String) genotype.getExtendedAttribute(GATKSVVCFConstants.RD_CN); - variantRdCn.put(sampleName, Integer.parseInt(rdCn)); + if (!genotype.hasExtendedAttribute(GATKSVVCFConstants.RD_CN)) continue; + + final int rdCn = Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.RD_CN).toString()); + variantRdCn.put(sampleName, rdCn); } currentCopyNumbers.put(variantId, variantRdCn); } public void thirdPassApply(final VariantContext variant) { - VariantContextBuilder builder = new VariantContextBuilder(variant); + final VariantContextBuilder builder = new VariantContextBuilder(variant); if (revisedEventsAll.containsKey(variant.getID())) { processRevisedEvent(builder, variant); } @@ -201,29 +203,29 @@ private void processOverlap(final VariantContext v1, final VariantContext v2) { } else { return; } - String widerID = wider.getID(); - String narrowerID = narrower.getID(); + final String widerID = wider.getID(); + final String narrowerID = narrower.getID(); // Skip processing if same variant ID, SV type or samples - String widerSvType = wider.getAttributeAsString(GATKSVVCFConstants.SVTYPE, ""); - String narrowerSvType = narrower.getAttributeAsString(GATKSVVCFConstants.SVTYPE, ""); - Set widerSamples = getNonReferenceSamples(wider); - Set narrowerSamples = getNonReferenceSamples(narrower); + final String widerSvType = wider.getAttributeAsString(GATKSVVCFConstants.SVTYPE, ""); + final String narrowerSvType = narrower.getAttributeAsString(GATKSVVCFConstants.SVTYPE, ""); + final Set widerSamples = getNonReferenceSamples(wider); + final Set narrowerSamples = getNonReferenceSamples(narrower); if (widerID.equals(narrowerID) || widerSvType.equals(narrowerSvType) || widerSamples.equals(narrowerSamples)) { return; } // Get samples present in wider but not in narrower - Set nonCommonSamples = new HashSet<>(widerSamples); + final Set nonCommonSamples = new HashSet<>(widerSamples); nonCommonSamples.removeAll(narrowerSamples); if (nonCommonSamples.isEmpty()) { return; } // Revise variant if coverage exceeds threshold - double coverage = getCoverage(wider, narrower); + final double coverage = getCoverage(wider, narrower); if (coverage >= 0.5) { - for (String sample : nonCommonSamples) { + for (final String sample : nonCommonSamples) { revisedEventsAll.computeIfAbsent(narrowerID, k -> new HashMap<>()) .put(sample, new ImmutablePair<>(widerID, widerSvType)); } @@ -257,7 +259,10 @@ private void processRevisedEvent(final VariantContextBuilder builder, final Vari if (newVal != -1) { final GenotypeBuilder gb = new GenotypeBuilder(oldGenotype); gb.alleles(Arrays.asList(variant.getReference(), variant.getAlternateAllele(0))); - gb.GQ(Integer.parseInt((String) oldGenotype.getExtendedAttribute(GATKSVVCFConstants.RD_GQ))); +// if (!oldGenotype.hasExtendedAttribute(GATKSVVCFConstants.RD_CN)) continue; + + final int rdCn = Integer.parseInt(oldGenotype.getExtendedAttribute(GATKSVVCFConstants.RD_CN).toString()); + gb.GQ(rdCn); newGenotypes.add(gb.make()); } else { newGenotypes.add(oldGenotype); @@ -273,8 +278,9 @@ private void processCnvs(final VariantContextBuilder builder, final VariantConte final boolean isDel = variant.getAttributeAsString(GATKSVVCFConstants.SVTYPE, "").equals(GATKSVVCFConstants.SYMB_ALT_STRING_DEL); for (String sample : variant.getSampleNamesOrderedByName()) { final Genotype genotype = variant.getGenotype(sample); - final String rdCnString = (String) genotype.getExtendedAttribute(GATKSVVCFConstants.RD_CN); - final int rdCn = Integer.parseInt(rdCnString); + if (!genotype.hasExtendedAttribute(GATKSVVCFConstants.RD_CN)) continue; + + final int rdCn = Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.RD_CN).toString()); if ((isDel && rdCn > 3) || (!isDel && (rdCn < 1 || rdCn > 4))) { builder.attribute(GATKSVVCFConstants.MULTI_CNV, true); break; @@ -283,12 +289,12 @@ private void processCnvs(final VariantContextBuilder builder, final VariantConte } private boolean isDelDup(final VariantContext variant) { - String svType = variant.getAttributeAsString(GATKSVVCFConstants.SVTYPE, ""); + final String svType = variant.getAttributeAsString(GATKSVVCFConstants.SVTYPE, ""); return svType.equals(GATKSVVCFConstants.SYMB_ALT_STRING_DEL) || svType.equals(GATKSVVCFConstants.SYMB_ALT_STRING_DUP); } private boolean isLarge(final VariantContext variant, final int minSize) { - int variantLength = Math.abs(variant.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0)); + final int variantLength = Math.abs(variant.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0)); return variantLength >= minSize; } @@ -299,9 +305,9 @@ private boolean overlaps(final VariantContext v1, final VariantContext v2) { } private Set getNonReferenceSamples(final VariantContext variant) { - Set samples = new HashSet<>(); - for (String sampleName : variant.getSampleNames()) { - Genotype genotype = variant.getGenotype(sampleName); + final Set samples = new HashSet<>(); + for (final String sampleName : variant.getSampleNames()) { + final Genotype genotype = variant.getGenotype(sampleName); if (genotype.isCalled() && !genotype.isHomRef()) { samples.add(sampleName); } @@ -310,13 +316,13 @@ private Set getNonReferenceSamples(final VariantContext variant) { } private double getCoverage(final VariantContext wider, final VariantContext narrower) { - int nStart = narrower.getStart(); - int nStop = narrower.getEnd(); - int wStart = wider.getStart(); - int wStop = wider.getEnd(); + final int nStart = narrower.getStart(); + final int nStop = narrower.getEnd(); + final int wStart = wider.getStart(); + final int wStop = wider.getEnd(); if (wStart <= nStop && nStart <= wStop) { - int intersectionSize = Math.min(nStop, wStop) - Math.max(nStart, wStart) + 1; + final int intersectionSize = Math.min(nStop, wStop) - Math.max(nStart, wStart) + 1; return (double) intersectionSize / (nStop - nStart + 1); } return 0.0;