Skip to content

Commit

Permalink
Small fixes to Calculate Precision and Sensitivity WDL [VS-1168] (#8817)
Browse files Browse the repository at this point in the history
  • Loading branch information
rsasch authored May 7, 2024
1 parent b447d7f commit 614be8b
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 5 deletions.
1 change: 1 addition & 0 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@ workflows:
branches:
- master
- ah_var_store
- rsa_vs_1168_bge
tags:
- /.*/
- name: GvsQuickstartVcfIntegration
Expand Down
23 changes: 19 additions & 4 deletions scripts/variantstore/wdl/GvsCalculatePrecisionAndSensitivity.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ workflow GvsCalculatePrecisionAndSensitivity {
String dataset_name
String filter_set_name
File interval_list
File? vcf_eval_bed_file
Array[String] chromosomes = ["chr20"]
String project_id
Array[String] sample_names

Expand All @@ -32,6 +34,7 @@ workflow GvsCalculatePrecisionAndSensitivity {

parameter_meta {
call_set_identifier: "The name of the callset for which we are calculating precision and sensitivity."
chromosomes: "The chromosome(s) on which to analyze precision and sensitivity. The default value for this is `['chr20']`."
dataset_name: "The GVS BigQuery dataset name."
filter_set_name: "The filter_set_name used to generate the callset."
interval_list: "The intervals over which to calculate precision and sensitivity."
Expand All @@ -41,6 +44,7 @@ workflow GvsCalculatePrecisionAndSensitivity {
truth_vcf_indices: "A list of the VCF indices for the truth data VCFs supplied above."
truth_beds: "A list of the bed files for the truth data used for analyzing the samples in `sample_names`."
ref_fasta: "The cloud path for the reference fasta sequence."
vcf_eval_bed_file: "Optional bed file for EvaluateVcf; if passed, will be used instead of chromosomes."
}

String output_basename = call_set_identifier + "_PS"
Expand Down Expand Up @@ -132,7 +136,8 @@ workflow GvsCalculatePrecisionAndSensitivity {
truth_vcf = truth_vcfs[i],
truth_vcf_index = truth_vcf_indices[i],
truth_bed = truth_beds[i],
interval_list = interval_list,
vcf_eval_bed_file = vcf_eval_bed_file,
chromosomes = chromosomes,
output_basename = sample_name + "-bq_roc_filtered",
is_vqsr_lite = IsVQSRLite.is_vqsr_lite,
ref_fasta = ref_fasta,
Expand All @@ -146,7 +151,8 @@ workflow GvsCalculatePrecisionAndSensitivity {
truth_vcf = truth_vcfs[i],
truth_vcf_index = truth_vcf_indices[i],
truth_bed = truth_beds[i],
interval_list = interval_list,
vcf_eval_bed_file = vcf_eval_bed_file,
chromosomes = chromosomes,
all_records = true,
output_basename = sample_name + "-bq_all",
is_vqsr_lite = IsVQSRLite.is_vqsr_lite,
Expand Down Expand Up @@ -377,9 +383,10 @@ task EvaluateVcf {
File truth_vcf
File truth_vcf_index
File truth_bed
File? vcf_eval_bed_file
Array[String] chromosomes

Boolean all_records = false
File interval_list

File ref_fasta

Expand All @@ -396,14 +403,22 @@ task EvaluateVcf {
String max_score_field_tag = if (is_vqsr_lite == true) then 'MAX_CALIBRATION_SENSITIVITY' else 'MAX_AS_VQSLOD'

command <<<
chromosomes=( ~{sep=' ' chromosomes} )

echo "Creating .bed file to control which chromosomes should be evaluated."
for i in "${chromosomes[@]}"
do
echo "$i 0 300000000" >> chromosomes.to.eval.txt
done

# Prepend date, time and pwd to xtrace log entries.
PS4='\D{+%F %T} \w $ '
set -o errexit -o nounset -o pipefail -o xtrace

rtg format --output human_REF_SDF ~{ref_fasta}

rtg vcfeval \
--bed-regions ~{interval_list} \
--bed-regions ~{if defined(vcf_eval_bed_file) then vcf_eval_bed_file else "chromosomes.to.eval.txt"} \
~{if all_records then "--all-records" else ""} \
--roc-subset snp,indel \
--vcf-score-field=INFO.~{max_score_field_tag} \
Expand Down
2 changes: 1 addition & 1 deletion scripts/variantstore/wdl/GvsExtractCallset.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ workflow GvsExtractCallset {
Float y_bed_weight_scaling = 4
Boolean is_wgs = true
Boolean convert_filtered_genotypes_to_nocalls = false
Boolean write_cost_to_db = true
Int? maximum_alternate_alleles
}

Expand All @@ -72,7 +73,6 @@ workflow GvsExtractCallset {

Boolean emit_pls = false
Boolean emit_ads = true
Boolean write_cost_to_db = true

String intervals_file_extension = if (zero_pad_output_vcf_filenames) then '-~{output_file_base_name}.vcf.gz.interval_list' else '-scattered.interval_list'

Expand Down

0 comments on commit 614be8b

Please sign in to comment.