-
Notifications
You must be signed in to change notification settings - Fork 72
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Filter wham-only DELs and scramble-only SVAs in CleanVcf & docs updates #740
Changes from 6 commits
424c103
b13f6b0
84c5f85
151b591
e49c643
f408d6d
a4b964f
c878e59
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
{ | ||
"VisualizeCnvs.vcf_or_bed": "${this.filtered_vcf}", | ||
"VisualizeCnvs.prefix": "${this.sample_set_set_id}", | ||
"VisualizeCnvs.median_files": "${this.sample_sets.median_cov}", | ||
"VisualizeCnvs.rd_files": "${this.sample_sets.merged_bincov}", | ||
"VisualizeCnvs.ped_file": "${workspace.cohort_ped_file}", | ||
"VisualizeCnvs.min_size": 50000, | ||
"VisualizeCnvs.flags": "-s 999999999", | ||
"VisualizeCnvs.sv_pipeline_docker": "${workspace.sv_pipeline_docker}" | ||
} |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -53,6 +53,7 @@ workflow CleanVcfChromosome { | |||||
RuntimeAttr? runtime_override_stitch_fragmented_cnvs | ||||||
RuntimeAttr? runtime_override_final_cleanup | ||||||
RuntimeAttr? runtime_override_rescue_me_dels | ||||||
RuntimeAttr? runtime_attr_add_high_fp_rate_filters | ||||||
|
||||||
# Clean vcf 1b | ||||||
RuntimeAttr? runtime_attr_override_subset_large_cnvs_1b | ||||||
|
@@ -299,9 +300,17 @@ workflow CleanVcfChromosome { | |||||
runtime_attr_override = runtime_override_rescue_me_dels | ||||||
} | ||||||
|
||||||
call FinalCleanup { | ||||||
call AddHighFPRateFilters { | ||||||
input: | ||||||
vcf=RescueMobileElementDeletions.out, | ||||||
prefix="~{prefix}.high_fp_filtered", | ||||||
sv_pipeline_docker=sv_pipeline_docker, | ||||||
runtime_attr_override=runtime_attr_add_high_fp_rate_filters | ||||||
} | ||||||
|
||||||
call FinalCleanup { | ||||||
input: | ||||||
vcf=AddHighFPRateFilters.out, | ||||||
contig=contig, | ||||||
prefix="~{prefix}.final_cleanup", | ||||||
sv_pipeline_docker=sv_pipeline_docker, | ||||||
|
@@ -799,6 +808,60 @@ task StitchFragmentedCnvs { | |||||
} | ||||||
} | ||||||
|
||||||
# Add FILTER status for pockets of variants with high FP rate: wham-only DELs and Scramble-only SVAs with HIGH_SR_BACKGROUND | ||||||
task AddHighFPRateFilters { | ||||||
input { | ||||||
File vcf | ||||||
String prefix | ||||||
String sv_pipeline_docker | ||||||
RuntimeAttr? runtime_attr_override | ||||||
} | ||||||
|
||||||
Float input_size = size(vcf, "GiB") | ||||||
RuntimeAttr runtime_default = object { | ||||||
mem_gb: 3.75 + input_size * 1.5, | ||||||
disk_gb: ceil(100.0 + input_size * 3.0), | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
cpu_cores: 1, | ||||||
preemptible_tries: 3, | ||||||
max_retries: 1, | ||||||
boot_disk_gb: 10 | ||||||
} | ||||||
RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) | ||||||
runtime { | ||||||
memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB" | ||||||
disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" | ||||||
cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) | ||||||
preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) | ||||||
maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) | ||||||
docker: sv_pipeline_docker | ||||||
bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) | ||||||
} | ||||||
|
||||||
command <<< | ||||||
set -euo pipefail | ||||||
|
||||||
python <<CODE | ||||||
import pysam | ||||||
fin = pysam.VariantFile("~{vcf}") | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
header = fin.header | ||||||
header.add_line("##FILTER=<ID=HIGH_ALGORITHM_FP_RATE,Description=\"Categories of variants with low specificity including Wham-only deletions and certain Scramble SVAs\">") | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I realize I probably suggested this name but FDR is more appropriate |
||||||
fo = pysam.VariantFile("~{prefix}.vcf.gz", 'w', header=header) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
for record in fin: | ||||||
if (record.info['ALGORITHMS'] == ('wham',) and record.info['SVTYPE'] == 'DEL') or \ | ||||||
(record.info['ALGORITHMS'] == ('scramble',) and record.info['HIGH_SR_BACKGROUND'] and record.alts == ('<INS:ME:SVA>',)): | ||||||
record.filter.add('HIGH_ALGORITHM_FP_RATE') | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
fo.write(record) | ||||||
fin.close() | ||||||
fo.close() | ||||||
CODE | ||||||
>>> | ||||||
|
||||||
output { | ||||||
File out = "~{prefix}.vcf.gz" | ||||||
} | ||||||
} | ||||||
|
||||||
|
||||||
|
||||||
# Final VCF cleanup | ||||||
task FinalCleanup { | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Was this copy+paste from somewhere? 3.75 should be plenty I think.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah most of the things you pointed out were from copy/paste, I'll do a better job of cleanup next time. Thanks for catching