Skip to content

Commit

Permalink
Added sex revisions for male GT
Browse files Browse the repository at this point in the history
  • Loading branch information
kjaisingh committed Jan 13, 2025
1 parent 0ab56eb commit 8fbd27e
Show file tree
Hide file tree
Showing 6 changed files with 130 additions and 132 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -165,35 +165,30 @@ public enum ComplexVariantSubtype {
public static final String LOW_QS_SCORE_FILTER_KEY = "LOW_QS";
public static final String FREQUENCY_FILTER_KEY = "FREQ";

// SVCleanPt1a
public static final String EV = "EV";
public static final List<String> EV_VALUES = Arrays.asList(
null, "RD", "PE", "RD,PE", "SR", "RD,SR", "PE,SR", "RD,PE,SR"
);
// CleanVcf
public static final String ME = "ME";
public static final String VAR_GQ = "varGQ";
public static final String MULTIALLELIC = "MULTIALLELIC";
public static final String UNRESOLVED = "UNRESOLVED";
public static final String HIGH_SR_BACKGROUND = "HIGH_SR_BACKGROUND";
public static final String BOTHSIDES_SUPPORT = "BOTHSIDES_SUPPORT";
public static final String PESR_GT_OVERDISPERSION = "PESR_GT_OVERDISPERSION";
public static final String REVISED_EVENT = "REVISED_EVENT";
public static final String RD_CN = "RD_CN";

// SVCleanPt1b
public static final String RD_GQ = "RD_GQ";
public static final String MULTI_CNV = "MULTI_CNV";

// SVCleanPt4
public static final String PESR_GT_OVERDISPERSION = "PESR_GT_OVERDISPERSION";
public static final String RD_CN = "RD_CN";
public static final String RD_GQ = "RD_GQ";
public static final String PE_GT = "PE_GT";
public static final String SR_GT = "SR_GT";
public static final String PE_GQ = "PE_GQ";
public static final String SR_GQ = "SR_GQ";
public static final String CNV = "CNV";

// SVCleanPt5
public static final String UNR = "UNR";
public static final String EVENT = "EVENT";
public static final String EV = "EV";
public static final List<String> EV_VALUES = Arrays.asList(
null, "RD", "PE", "RD,PE", "SR", "RD,SR", "PE,SR", "RD,PE,SR"
);
public static final Set<String> FILTER_VCF_LINES = new HashSet<>(Arrays.asList(
"CIPOS", "CIEND", "RMSSTD", "source", "bcftools", "GATKCommandLine", "#CHROM"
));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,18 +81,19 @@ public void closeTool() {
}

@Override
public void apply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) {
VariantContextBuilder builder = new VariantContextBuilder(variant);
if (!variant.getAttributeAsBoolean(GATKSVVCFConstants.REVISED_EVENT, false)) {
public void apply(final VariantContext variant, final ReadsContext readsContext,
final ReferenceContext referenceContext, final FeatureContext featureContext) {
final VariantContextBuilder builder = new VariantContextBuilder(variant);
if (variant.getAttributeAsBoolean(GATKSVVCFConstants.REVISED_EVENT, false)) {
processRevisedSex(variant, builder);
}
vcfWriter.add(builder.make());
}

private void processRevisedSex(final VariantContext variant, final VariantContextBuilder builder) {
List<Genotype> genotypes = variant.getGenotypes();
List<Genotype> updatedGenotypes = new ArrayList<>(genotypes.size());
for (Genotype genotype : genotypes) {
final List<Genotype> genotypes = variant.getGenotypes();
final List<Genotype> updatedGenotypes = new ArrayList<>(genotypes.size());
for (final Genotype genotype : genotypes) {
if (Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.RD_CN, 0).toString()) > 0) {
int newRdCn = Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.RD_CN).toString()) - 1;
GenotypeBuilder gb = new GenotypeBuilder(genotype);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ public void closeTool() {
@Override
public void apply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) {
// Initialize data structures
VariantContextBuilder builder = new VariantContextBuilder(variant);
final VariantContextBuilder builder = new VariantContextBuilder(variant);
List<Genotype> genotypes = variant.getGenotypes();

// Process variants
Expand All @@ -152,9 +152,9 @@ public void apply(final VariantContext variant, final ReadsContext readsContext,
private void processMultiallelic(final VariantContextBuilder builder, final List<Genotype> genotypes) {
int numGtOver2 = 0;
for (Genotype genotype : genotypes) {
Integer peGt = genotype.hasExtendedAttribute(GATKSVVCFConstants.PE_GT) ?
final Integer peGt = genotype.hasExtendedAttribute(GATKSVVCFConstants.PE_GT) ?
Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.PE_GT).toString()) : null;
Integer srGt = genotype.hasExtendedAttribute(GATKSVVCFConstants.SR_GT) ?
final Integer srGt = genotype.hasExtendedAttribute(GATKSVVCFConstants.SR_GT) ?
Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.SR_GT).toString()) : null;
int gt;
if (peGt == null) {
Expand All @@ -166,9 +166,9 @@ private void processMultiallelic(final VariantContextBuilder builder, final List
} else if (peGt == 0) {
gt = srGt;
} else {
Integer peGq = genotype.hasExtendedAttribute(GATKSVVCFConstants.PE_GQ) ?
final Integer peGq = genotype.hasExtendedAttribute(GATKSVVCFConstants.PE_GQ) ?
Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.PE_GQ).toString()) : null;
Integer srGq = genotype.hasExtendedAttribute(GATKSVVCFConstants.SR_GQ) ?
final Integer srGq = genotype.hasExtendedAttribute(GATKSVVCFConstants.SR_GQ) ?
Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.SR_GQ).toString()) : null;
if (peGq != null && srGq != null && peGq >= srGq) {
gt = peGt;
Expand All @@ -177,7 +177,7 @@ private void processMultiallelic(final VariantContextBuilder builder, final List
}
}
if (gt > 2) {
numGtOver2++;
numGtOver2 += 1;
}
}
if (numGtOver2 > maxVF) {
Expand All @@ -204,7 +204,7 @@ private List<Genotype> processLargeDeletions(final VariantContext variant, final
}

boolean gt5kbFilter = false;
List<Integer> allowedAlleleIndices = Arrays.asList(-1, 0, 1);
final List<Integer> allowedAlleleIndices = Arrays.asList(-1, 0, 1);
if (genotypes.stream().anyMatch(g -> g.getAlleles().stream().anyMatch(a -> !allowedAlleleIndices.contains(variant.getAlleleIndex(a))))) {
gt5kbFilter = true;
} else if (variant.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0) >= MIN_MULTIALLELIC_EVENT_SIZE && !multiallelicFilter) {
Expand All @@ -213,8 +213,8 @@ private List<Genotype> processLargeDeletions(final VariantContext variant, final

List<Genotype> updatedGenotypes = new ArrayList<>(genotypes.size());
if (gt5kbFilter) {
for (Genotype genotype : genotypes) {
GenotypeBuilder gb = new GenotypeBuilder(genotype);
for (final Genotype genotype : genotypes) {
final GenotypeBuilder gb = new GenotypeBuilder(genotype);
if (!genotype.isNoCall()) {
if (genotype.hasGQ() && Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.RD_CN, 0).toString()) >= 2) {
gb.alleles(Arrays.asList(variant.getReference(), variant.getReference()));
Expand All @@ -231,7 +231,7 @@ private List<Genotype> processLargeDeletions(final VariantContext variant, final

updatedGenotypes = new ArrayList<>(genotypes.size());
if (multiallelicFilter) {
for (Genotype genotype : genotypes) {
for (final Genotype genotype : genotypes) {
GenotypeBuilder gb = new GenotypeBuilder(genotype);
gb.noGQ();
gb.alleles(Arrays.asList(Allele.NO_CALL));
Expand All @@ -257,7 +257,7 @@ private List<Genotype> processLargeDuplications(final VariantContext variant, fi
boolean multiallelicFilter = false;
if (variant.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0) >= MIN_LARGE_EVENT_SIZE) {
Map<String, Integer> sampleRdCn = new HashMap<>();
for (Genotype genotype : genotypes) {
for (final Genotype genotype : genotypes) {
if (!outlierSamples.contains(genotype.getSampleName()) && genotype.hasExtendedAttribute(GATKSVVCFConstants.RD_CN)) {
sampleRdCn.put(genotype.getSampleName(), Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.RD_CN).toString()));
}
Expand All @@ -273,7 +273,7 @@ private List<Genotype> processLargeDuplications(final VariantContext variant, fi
}

boolean gt5kbFilter = false;
List<Integer> allowedAlleleIndices = Arrays.asList(-1, 0, 1);
final List<Integer> allowedAlleleIndices = Arrays.asList(-1, 0, 1);
if (genotypes.stream().anyMatch(g -> g.getAlleles().stream().anyMatch(a -> !allowedAlleleIndices.contains(variant.getAlleleIndex(a))))) {
gt5kbFilter = true;
} else if (variant.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0) >= MIN_MULTIALLELIC_EVENT_SIZE && !multiallelicFilter) {
Expand All @@ -282,8 +282,8 @@ private List<Genotype> processLargeDuplications(final VariantContext variant, fi

List<Genotype> updatedGenotypes = new ArrayList<>(genotypes.size());
if (gt5kbFilter) {
for (Genotype genotype : genotypes) {
GenotypeBuilder gb = new GenotypeBuilder(genotype);
for (final Genotype genotype : genotypes) {
final GenotypeBuilder gb = new GenotypeBuilder(genotype);
if (!genotype.isNoCall()) {
if (genotype.hasGQ() && Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.RD_CN, 3).toString()) <= 2) {
gb.alleles(Arrays.asList(variant.getReference(), variant.getReference()));
Expand All @@ -300,8 +300,8 @@ private List<Genotype> processLargeDuplications(final VariantContext variant, fi

updatedGenotypes = new ArrayList<>(genotypes.size());
if (multiallelicFilter) {
for (Genotype genotype : genotypes) {
GenotypeBuilder gb = new GenotypeBuilder(genotype);
for (final Genotype genotype : genotypes) {
final GenotypeBuilder gb = new GenotypeBuilder(genotype);
gb.noGQ();
gb.alleles(Arrays.asList(Allele.NO_CALL));
gb.attribute(GATKSVVCFConstants.COPY_NUMBER_FORMAT, genotype.getExtendedAttribute(GATKSVVCFConstants.RD_CN));
Expand All @@ -319,16 +319,15 @@ private List<Genotype> processLargeDuplications(final VariantContext variant, fi
}

public boolean isCalled(final VariantContextBuilder builder, final List<Genotype> genotypes) {
for (Genotype genotype : genotypes) {
for (final Genotype genotype : genotypes) {
if (!isNoCallGt(genotype.getAlleles())) {
return true;
}
}

if (builder.getAttributes().getOrDefault(GATKSVVCFConstants.SVTYPE, "").equals(GATKSVVCFConstants.CNV)) {
for (Genotype genotype : genotypes) {
final int cn = Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.COPY_NUMBER_FORMAT, 2).toString());
if (cn != 2) {
for (final Genotype genotype : genotypes) {
if (Integer.parseInt(genotype.getExtendedAttribute(GATKSVVCFConstants.COPY_NUMBER_FORMAT, 2).toString()) != 2) {
return true;
}
}
Expand All @@ -337,7 +336,7 @@ public boolean isCalled(final VariantContextBuilder builder, final List<Genotype
return false;
}

private boolean isNoCallGt(List<Allele> alleles) {
private boolean isNoCallGt(final List<Allele> alleles) {
if (alleles.size() == 1 && alleles.get(0).isReference()) return true;
else if (alleles.size() == 2 && alleles.get(0).isReference() && alleles.get(1).isReference()) return true;
else if (alleles.size() == 1 && alleles.get(0).isNoCall()) return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFFormatHeaderLine;

import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.BetaFeature;
Expand Down Expand Up @@ -93,7 +90,8 @@ public void closeTool() {
}

@Override
protected void nthPassApply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext, int n) {
protected void nthPassApply(final VariantContext variant, final ReadsContext readsContext,
final ReferenceContext referenceContext, final FeatureContext featureContext, int n) {
switch (n) {
case 0:
firstPassApply(variant);
Expand All @@ -111,7 +109,7 @@ public void firstPassApply(final VariantContext variant) {

overlappingVariantsBuffer.removeIf(vc -> !vc.getContig().equals(variant.getContig())
|| (vc.getStart() + vc.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0)) < variant.getStart());
for (VariantContext bufferedVariant : overlappingVariantsBuffer) {
for (final VariantContext bufferedVariant : overlappingVariantsBuffer) {
if (overlaps(bufferedVariant, variant)) {
processVariantPair(bufferedVariant, variant);
}
Expand All @@ -124,16 +122,16 @@ public void secondPassApply(final VariantContext variant) {
return;
}

VariantContextBuilder builder = new VariantContextBuilder(variant);
final VariantContextBuilder builder = new VariantContextBuilder(variant);
vcfWriter.add(builder.make());
}

private void processVariantPair(VariantContext v1, VariantContext v2) {
private void processVariantPair(final VariantContext v1, final VariantContext v2) {
// Determine larger variant
VariantContext largerVariant = v1;
VariantContext smallerVariant = v2;
int length1 = v1.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0);
int length2 = v2.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0);
final int length1 = v1.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0);
final int length2 = v2.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0);
int smallerLength = length2;

// Swap variants if necessary
Expand All @@ -148,14 +146,14 @@ private void processVariantPair(VariantContext v1, VariantContext v2) {
largerVariant.getStart() + largerVariant.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0),
smallerVariant.getStart() + smallerVariant.getAttributeAsInt(GATKSVVCFConstants.SVLEN, 0)
);
int maxStart = Math.max(largerVariant.getStart(), smallerVariant.getStart());
int overlapLength = minEnd - maxStart + 1;
final int maxStart = Math.max(largerVariant.getStart(), smallerVariant.getStart());
final int overlapLength = minEnd - maxStart + 1;
if (overlapLength <= 0) {
return;
}

// Filter variant based on conditions
double coverage = (double) overlapLength / smallerLength;
final double coverage = (double) overlapLength / smallerLength;
if (coverage > 0.5 && !filteredVariantIds.contains(largerVariant.getID())) {
filteredVariantIds.add(smallerVariant.getID());
}
Expand Down
Loading

0 comments on commit 8fbd27e

Please sign in to comment.