Skip to content

Commit

Permalink
VS-1336 - It's not a site FILTER (#8773) (#8850)
Browse files Browse the repository at this point in the history
* Change extract so that when we filter at the genotype level (with FT) the VCF header has the filter definition in the FORMAT field.
* Also minor renaming of ExtractCohort argument.
* Point to updated truth.
  • Loading branch information
gbggrant authored May 29, 2024
1 parent 3b8e4da commit 9a0172d
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 17 deletions.
2 changes: 1 addition & 1 deletion scripts/variantstore/wdl/GvsUtils.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ task GetToolVersions {
# there are a handlful of tasks that require the larger GNU libc-based `slim`.
String cloud_sdk_slim_docker = "gcr.io/google.com/cloudsdktool/cloud-sdk:435.0.0-slim"
String variants_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/variants:2024-05-06-alpine-778d8a77294d"
String gatk_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/gatk:2024_05_24-gatkbase-cdc749be72ba"
String gatk_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/gatk:2024_05_28-gatkbase-d303d1217d50"
String variants_nirvana_docker = "us.gcr.io/broad-dsde-methods/variantstore:nirvana_2022_10_19"
String real_time_genomics_docker = "docker.io/realtimegenomics/rtg-tools:latest"
String gotc_imputation_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,20 @@ public static VCFHeaderLine getVqsLodHeader(Double vqsLodThreshold, String model
"Site failed " + model + " model VQSLOD cutoff of " + vqsLodThreshold.toString());
}

public static VCFHeaderLine getTruthSensitivityHeader(Double truthSensitivityThreshold, Double vqsLodThreshold, String model) {
public static VCFHeaderLine getTruthSensitivityFilterHeader(Double truthSensitivityThreshold, Double vqsLodThreshold, String model) {
if (truthSensitivityThreshold == null) { // at this point, we know that all vqsr threshold inputs are null, so use defaults
truthSensitivityThreshold = GATKVCFConstants.SNP.contains(model) ? DEFAULT_TRUTH_SENSITIVITY_THRESHOLD_SNPS : DEFAULT_TRUTH_SENSITIVITY_THRESHOLD_INDELS;
}
return new VCFFilterHeaderLine(GATKVCFConstants.VQSR_FAILURE_PREFIX + model,
"Site failed " + model + " model sensitivity cutoff (" + truthSensitivityThreshold + "), corresponding with VQSLOD cutoff of " + vqsLodThreshold.toString());
}

public static VCFHeaderLine getTruthSensitivityHeader(Double truthSensitivityThreshold, Double vqsLodThreshold, String model) {
if (truthSensitivityThreshold == null) { // at this point, we know that all vqsr threshold inputs are null, so use defaults
truthSensitivityThreshold = GATKVCFConstants.SNP.contains(model) ? DEFAULT_TRUTH_SENSITIVITY_THRESHOLD_SNPS : DEFAULT_TRUTH_SENSITIVITY_THRESHOLD_INDELS;
}
return new VCFHeaderLine(GATKVCFConstants.VQSR_FAILURE_PREFIX + model,
"Sample Genotype FT filter value indicating that the genotyped allele failed " + model + " model sensitivity cutoff (" + truthSensitivityThreshold + "), corresponding with VQSLOD cutoff of " + vqsLodThreshold.toString());
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ public enum VQScoreFilteringType {GENOTYPE, SITES, NONE}

@Argument(
fullName = "vqs-score-filter-by-site",
doc = "If Variant Quality Score filtering is applied, it should be at a site level. Default is false",
doc = "If Variant Quality Score filtering (either VETS or VQSR) is applied, it should be at a site level. Default is false",
optional = true
)
// historical note that this parameter was previously named 'vqsr-score-filter-by-site', changed as it's not VQSR-specific
Expand Down Expand Up @@ -341,8 +341,15 @@ protected void onStartup() {
vqsLodSNPThreshold = FilterSensitivityTools.getVqslodThreshold(trancheMaps.get(GATKVCFConstants.SNP), truthSensitivitySNPThreshold, GATKVCFConstants.SNP);
vqsLodINDELThreshold = FilterSensitivityTools.getVqslodThreshold(trancheMaps.get(GATKVCFConstants.INDEL), truthSensitivityINDELThreshold, GATKVCFConstants.INDEL);
// set headers
extraHeaderLines.add(FilterSensitivityTools.getTruthSensitivityHeader(truthSensitivitySNPThreshold, vqsLodSNPThreshold, GATKVCFConstants.SNP));
extraHeaderLines.add(FilterSensitivityTools.getTruthSensitivityHeader(truthSensitivityINDELThreshold, vqsLodINDELThreshold, GATKVCFConstants.INDEL));

if (vqScoreFilteringType.equals(VQScoreFilteringType.SITES)) {
extraHeaderLines.add(FilterSensitivityTools.getTruthSensitivityFilterHeader(truthSensitivitySNPThreshold, vqsLodSNPThreshold, GATKVCFConstants.SNP));
extraHeaderLines.add(FilterSensitivityTools.getTruthSensitivityFilterHeader(truthSensitivityINDELThreshold, vqsLodINDELThreshold, GATKVCFConstants.INDEL));
}
else if (vqScoreFilteringType.equals(VQScoreFilteringType.GENOTYPE)) {
extraHeaderLines.add(FilterSensitivityTools.getTruthSensitivityHeader(truthSensitivitySNPThreshold, vqsLodSNPThreshold, GATKVCFConstants.SNP));
extraHeaderLines.add(FilterSensitivityTools.getTruthSensitivityHeader(truthSensitivityINDELThreshold, vqsLodINDELThreshold, GATKVCFConstants.INDEL));
}
}
} else {
extraHeaderLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.SCORE_KEY));
Expand All @@ -359,15 +366,23 @@ protected void onStartup() {
truthSensitivityINDELThreshold /= 100.0;
logger.info("Passing all INDEL variants with " + GATKVCFConstants.CALIBRATION_SENSITIVITY_KEY + " < " + truthSensitivityINDELThreshold);

extraHeaderLines.add(new VCFFilterHeaderLine(GATKVCFConstants.CALIBRATION_SENSITIVITY_FAILURE_SNP,
"Site failed SNP model calibration sensitivity cutoff (" + truthSensitivitySNPThreshold.toString() + ")"));
extraHeaderLines.add(new VCFFilterHeaderLine(GATKVCFConstants.CALIBRATION_SENSITIVITY_FAILURE_INDEL,
"Site failed INDEL model calibration sensitivity cutoff (" + truthSensitivityINDELThreshold.toString() + ")"));
if (vqScoreFilteringType.equals(VQScoreFilteringType.SITES)) {
extraHeaderLines.add(new VCFFilterHeaderLine(GATKVCFConstants.CALIBRATION_SENSITIVITY_FAILURE_SNP,
"Site failed SNP model calibration sensitivity cutoff (" + truthSensitivitySNPThreshold.toString() + ")"));
extraHeaderLines.add(new VCFFilterHeaderLine(GATKVCFConstants.CALIBRATION_SENSITIVITY_FAILURE_INDEL,
"Site failed INDEL model calibration sensitivity cutoff (" + truthSensitivityINDELThreshold.toString() + ")"));
}
else if (vqScoreFilteringType.equals(VQScoreFilteringType.GENOTYPE)) {
extraHeaderLines.add(new VCFHeaderLine(GATKVCFConstants.CALIBRATION_SENSITIVITY_FAILURE_SNP,
"Sample Genotype FT filter value indicating that the genotyped allele failed SNP model calibration sensitivity cutoff (" + truthSensitivitySNPThreshold.toString() + ")"));
extraHeaderLines.add(new VCFHeaderLine(GATKVCFConstants.CALIBRATION_SENSITIVITY_FAILURE_INDEL,
"Sample Genotype FT filter value indicating that the genotyped allele failed INDEL model calibration sensitivity cutoff (" + truthSensitivityINDELThreshold.toString() + ")"));
}
}
}

if (vqScoreFilteringType.equals(VQScoreFilteringType.GENOTYPE)) {
extraHeaderLines.add(new VCFFormatHeaderLine("FT", 1, VCFHeaderLineType.String, "Genotype Filter Field"));
extraHeaderLines.add(new VCFFormatHeaderLine("FT", 1, VCFHeaderLineType.String, "Sample Genotype Filter Field"));
}

if (emitPLs) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
public class FilterSensitivityToolsTest {

// for testing inputs
private Double definedDoubleInput = 0.0;
private Double undefinedDoubleInput = null;
private String definedStringInput = "I'm defined!";
private String undefinedStringInput = null;
private final Double definedDoubleInput = 0.0;
private final Double undefinedDoubleInput = null;
private final String definedStringInput = "I'm defined!";
private final String undefinedStringInput = null;

private Map<Double, Double> testTrancheMap = new TreeMap<>();
private final Map<Double, Double> testTrancheMap = new TreeMap<>();

@BeforeMethod
public void setUp() {
Expand Down Expand Up @@ -211,7 +211,7 @@ public void testGetTruthSensitivityHeaderSNP() {
VCFFilterHeaderLine expectedHeader = new VCFFilterHeaderLine(GATKVCFConstants.VQSR_FAILURE_PREFIX + model,
"Site failed SNP model sensitivity cutoff (90.0), corresponding with VQSLOD cutoff of 0.0");

assertEquals(getTruthSensitivityHeader(truthSensitivityThreshold, vqsLodThreshold, GATKVCFConstants.SNP), expectedHeader);
assertEquals(getTruthSensitivityFilterHeader(truthSensitivityThreshold, vqsLodThreshold, GATKVCFConstants.SNP), expectedHeader);
}

@Test
Expand All @@ -222,7 +222,7 @@ public void testGetTruthSensitivityHeaderINDEL() {
VCFFilterHeaderLine expectedHeader = new VCFFilterHeaderLine(GATKVCFConstants.VQSR_FAILURE_PREFIX + model,
"Site failed INDEL model sensitivity cutoff (90.0), corresponding with VQSLOD cutoff of 0.0");

assertEquals(getTruthSensitivityHeader(truthSensitivityThreshold, vqsLodThreshold, GATKVCFConstants.INDEL), expectedHeader);
assertEquals(getTruthSensitivityFilterHeader(truthSensitivityThreshold, vqsLodThreshold, GATKVCFConstants.INDEL), expectedHeader);
}

}

0 comments on commit 9a0172d

Please sign in to comment.