-
Notifications
You must be signed in to change notification settings - Fork 597
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add malaria spanning deletion exception regression test with fix (#8802)
* Add malaria spanning deletion exception regression test with fix * Disabling codecov. --------- Co-authored-by: Jonn Smith <[email protected]>
- Loading branch information
1 parent
aed8b1b
commit ec39c37
Showing
5 changed files
with
113 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
70 changes: 70 additions & 0 deletions
70
...itute/hellbender/tools/walkers/variantutils/ReblockGVCF/reblock_cleanup_bug_variant.g.vcf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
##fileformat=VCFv4.2 | ||
##ALT=<ID=NON_REF,Description="Represents any possible alternative allele not already represented at this location by REF and ALT"> | ||
##FILTER=<ID=LowQual,Description="Low quality"> | ||
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> | ||
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)"> | ||
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum DP observed within the GVCF block"> | ||
##FORMAT=<ID=PGT,Number=1,Type=String,Description="Physical phasing haplotype information, describing how the alternate alleles are phased in relation to one another; will always be heterozygous and is not intended to describe called alleles"> | ||
##FORMAT=<ID=PID,Number=1,Type=String,Description="Physical phasing ID information, where each unique ID within a given sample (but not across samples) connects records within a phasing group"> | ||
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"> | ||
##FORMAT=<ID=PS,Number=1,Type=Integer,Description="Phasing set (typically the position of the first variant in the set)"> | ||
##FORMAT=<ID=SB,Number=4,Type=Integer,Description="Per-sample component statistics which comprise the Fisher's Exact Test to detect strand bias."> | ||
##GATKCommandLine=<ID=HaplotypeCaller,CommandLine="HaplotypeCaller --annotate-with-num-discovered-alleles true --heterozygosity 0.001 --indel-heterozygosity 1.25E-4 --heterozygosity-stdev 0.01 --sample-ploidy 2 --contamination-fraction-to-filter 0.0 --gvcf-gq-bands 10 --gvcf-gq-bands 20 --gvcf-gq-bands 30 --gvcf-gq-bands 40 --gvcf-gq-bands 50 --gvcf-gq-bands 60 --gvcf-gq-bands 70 --gvcf-gq-bands 80 --gvcf-gq-bands 90 --linked-de-bruijn-graph true --pileup-detection true --pileup-detection-enable-indel-pileup-calling true --bam-output SEN_2012_SLP.CXX_0108.haplotype_caller.Pf3D7_11_v3.bamout.bam --smith-waterman FASTEST_AVAILABLE --emit-ref-confidence GVCF --output SEN_2012_SLP.CXX_0108.haplotype_caller.Pf3D7_11_v3.g.vcf.gz --intervals Pf3D7_11_v3 --input gs://fc-b06e896e-cc1d-4deb-b638-f7b87c3e5dbd/results/SRFlowcell/SEN_2012_SLP.CXX_0108/reads/aligned/SEN_2012_SLP.CXX_0108.aligned.merged.markDuplicates.sorted.BQSR.bam --reference /cromwell_root/broad-dsde-methods-long-reads/resources/references/plasmodb_release-61/Pfalciparum3D7/fasta/data/PlasmoDB-61_Pfalciparum3D7_Genome.fasta --annotation AssemblyComplexity --annotation-group StandardAnnotation --annotation-group StandardHCAnnotation --use-posteriors-to-calculate-qual false --dont-use-dragstr-priors false --use-new-qual-calculator true --standard-min-confidence-threshold-for-calling 30.0 --max-alternate-alleles 6 --max-genotype-count 1024 --num-reference-samples-if-no-call 0 --genotype-assignment-method USE_PLS_TO_ASSIGN --output-mode EMIT_VARIANTS_ONLY --all-site-pls false --flow-likelihood-parallel-threads 0 --flow-likelihood-optimized-comp false --trim-to-haplotype true --exact-matching false --flow-use-t0-tag false --flow-probability-threshold 0.003 --flow-remove-non-single-base-pair-indels false --flow-remove-one-zero-probs false --flow-quantization-bins 121 --flow-fill-empty-bins-value 0.001 --flow-symmetric-indel-probs false --flow-report-insertion-or-deletion false --flow-disallow-probs-larger-than-call false --flow-lump-probs false --flow-retain-max-n-probs-base-format false --flow-probability-scaling-factor 10 --flow-order-cycle-length 4 --keep-boundary-flows false --floor-blocks false --indel-size-to-eliminate-in-ref-model 10 --disable-optimizations false --dragen-mode false --dragen-378-concordance-mode false --flow-mode NONE --apply-bqd false --apply-frd false --disable-spanning-event-genotyping false --transform-dragen-mapping-quality false --mapping-quality-threshold-for-genotyping 20 --max-effective-depth-adjustment-for-frd 0 --just-determine-active-regions false --dont-genotype false --do-not-run-physical-phasing false --do-not-correct-overlapping-quality false --use-filtered-reads-for-annotations false --use-flow-aligner-for-stepwise-hc-filtering false --adaptive-pruning false --do-not-recover-dangling-branches false --recover-dangling-heads false --kmer-size 10 --kmer-size 25 --dont-increase-kmer-sizes-for-cycles false --allow-non-unique-kmers-in-ref false --num-pruning-samples 1 --min-dangling-branch-length 4 --recover-all-dangling-branches false --max-num-haplotypes-in-population 128 --min-pruning 2 --adaptive-pruning-initial-error-rate 0.001 --pruning-lod-threshold 2.302585092994046 --pruning-seeding-lod-threshold 9.210340371976184 --max-unpruned-variants 100 --disable-artificial-haplotype-recovery false --enable-legacy-graph-cycle-detection false --debug-assembly false --debug-graph-transformations false --capture-assembly-failure-bam false --num-matching-bases-in-dangling-end-to-recover -1 --error-correction-log-odds -Infinity --error-correct-reads false --kmer-length-for-read-error-correction 25 --min-observations-for-kmer-to-be-solid 20 --likelihood-calculation-engine PairHMM --base-quality-score-threshold 18 --dragstr-het-hom-ratio 2 --dont-use-dragstr-pair-hmm-scores false --pair-hmm-gap-continuation-penalty 10 --expected-mismatch-rate-for-read-disqualification 0.02 --pair-hmm-implementation FASTEST_AVAILABLE --pcr-indel-model CONSERVATIVE --phred-scaled-global-read-mismapping-rate 45 --disable-symmetric-hmm-normalizing false --disable-cap-base-qualities-to-map-quality false --enable-dynamic-read-disqualification-for-genotyping false --dynamic-read-disqualification-threshold 1.0 --native-pair-hmm-threads 4 --native-pair-hmm-use-double-precision false --flow-hmm-engine-min-indel-adjust 6 --flow-hmm-engine-flat-insertion-penatly 45 --flow-hmm-engine-flat-deletion-penatly 45 --use-pdhmm false --use-pdhmm-overlap-optimization false --make-determined-haps-from-pd-code false --print-pileupcalling-status false --fallback-gga-if-pdhmm-fails true --pileup-detection-active-region-phred-threshold 0.0 --num-artificial-haplotypes-to-add-per-allele 5 --artifical-haplotype-filtering-kmer-size 10 --pileup-detection-snp-alt-threshold 0.1 --pileup-detection-indel-alt-threshold 0.1 --pileup-detection-absolute-alt-depth 0.0 --pileup-detection-snp-adjacent-to-assembled-indel-range 5 --pileup-detection-snp-basequality-filter 12 --pileup-detection-bad-read-tolerance 0.0 --pileup-detection-proper-pair-read-badness true --pileup-detection-edit-distance-read-badness-threshold 0.08 --pileup-detection-chimeric-read-badness true --pileup-detection-template-mean-badness-threshold 0.0 --pileup-detection-template-std-badness-threshold 0.0 --pileup-detection-filter-assembly-alt-bad-read-tolerance 0.0 --pileup-detection-edit-distance-read-badness-for-assembly-filtering-threshold 0.12 --bam-writer-type CALLED_HAPLOTYPES --dont-use-soft-clipped-bases false --override-fragment-softclip-check false --min-base-quality-score 10 --max-mnp-distance 0 --force-call-filtered-alleles false --reference-model-deletion-quality 30 --soft-clip-low-quality-ends false --allele-informative-reads-overlap-margin 2 --smith-waterman-dangling-end-match-value 25 --smith-waterman-dangling-end-mismatch-penalty -50 --smith-waterman-dangling-end-gap-open-penalty -110 --smith-waterman-dangling-end-gap-extend-penalty -6 --smith-waterman-haplotype-to-reference-match-value 200 --smith-waterman-haplotype-to-reference-mismatch-penalty -150 --smith-waterman-haplotype-to-reference-gap-open-penalty -260 --smith-waterman-haplotype-to-reference-gap-extend-penalty -11 --smith-waterman-read-to-haplotype-match-value 10 --smith-waterman-read-to-haplotype-mismatch-penalty -15 --smith-waterman-read-to-haplotype-gap-open-penalty -30 --smith-waterman-read-to-haplotype-gap-extend-penalty -5 --flow-assembly-collapse-hmer-size 0 --flow-assembly-collapse-partial-mode false --flow-filter-alleles false --flow-filter-alleles-qual-threshold 30.0 --flow-filter-alleles-sor-threshold 3.0 --flow-filter-lone-alleles false --flow-filter-alleles-debug-graphs false --min-assembly-region-size 50 --max-assembly-region-size 300 --active-probability-threshold 0.002 --max-prob-propagation-distance 50 --force-active false --assembly-region-padding 100 --padding-around-indels 75 --padding-around-snps 20 --padding-around-strs 75 --max-extension-into-assembly-region-padding-legacy 25 --max-reads-per-alignment-start 50 --enable-legacy-assembly-region-trimming false --interval-set-rule UNION --interval-padding 0 --interval-exclusion-padding 0 --interval-merging-rule ALL --read-validation-stringency SILENT --seconds-between-progress-updates 10.0 --disable-sequence-dictionary-validation false --create-output-bam-index true --create-output-bam-md5 false --create-output-variant-index true --create-output-variant-md5 false --max-variants-per-shard 0 --lenient false --add-output-sam-program-record true --add-output-vcf-command-line true --cloud-prefetch-buffer 40 --cloud-index-prefetch-buffer -1 --disable-bam-index-caching false --sites-only-vcf-output false --help false --version false --showHidden false --verbosity INFO --QUIET false --use-jdk-deflater false --use-jdk-inflater false --gcs-max-retries 20 --gcs-project-for-requester-pays --disable-tool-default-read-filters false --minimum-mapping-quality 20 --disable-tool-default-annotations false --enable-all-annotations false --allow-old-rms-mapping-quality-annotation-data false --assembly-complexity-reference-mode false",Version="4.5.0.0",Date="April 23, 2024 at 6:39:27 PM GMT"> | ||
##GVCFBlock0-10=minGQ=0(inclusive),maxGQ=10(exclusive) | ||
##GVCFBlock10-20=minGQ=10(inclusive),maxGQ=20(exclusive) | ||
##GVCFBlock20-30=minGQ=20(inclusive),maxGQ=30(exclusive) | ||
##GVCFBlock30-40=minGQ=30(inclusive),maxGQ=40(exclusive) | ||
##GVCFBlock40-50=minGQ=40(inclusive),maxGQ=50(exclusive) | ||
##GVCFBlock50-60=minGQ=50(inclusive),maxGQ=60(exclusive) | ||
##GVCFBlock60-70=minGQ=60(inclusive),maxGQ=70(exclusive) | ||
##GVCFBlock70-80=minGQ=70(inclusive),maxGQ=80(exclusive) | ||
##GVCFBlock80-90=minGQ=80(inclusive),maxGQ=90(exclusive) | ||
##GVCFBlock90-100=minGQ=90(inclusive),maxGQ=100(exclusive) | ||
##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities"> | ||
##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered"> | ||
##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval"> | ||
##INFO=<ID=ExcessHet,Number=1,Type=Float,Description="Phred-scaled p-value for exact test of excess heterozygosity"> | ||
##INFO=<ID=HAPCOMP,Number=A,Type=Integer,Description="Edit distances of each alt allele's most common supporting haplotype from closest germline haplotype, excluding differences at the site in question."> | ||
##INFO=<ID=HAPDOM,Number=A,Type=Float,Description="For each alt allele, fraction of read support that best fits the most-supported haplotype containing the allele"> | ||
##INFO=<ID=HEC,Number=.,Type=Integer,Description="Counts of support for haplotype groups excluding difference at the site in question."> | ||
##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation"> | ||
##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed"> | ||
##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed"> | ||
##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities"> | ||
##INFO=<ID=NDA,Number=1,Type=Integer,Description="Number of alternate alleles discovered (but not necessarily genotyped) at this site"> | ||
##INFO=<ID=RAW_MQandDP,Number=2,Type=Integer,Description="Raw data (sum of squared MQ and total depth) for improved RMS Mapping Quality calculation. Incompatible with deprecated RAW_MQ formulation."> | ||
##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias"> | ||
##contig=<ID=Pf3D7_01_v3,length=640851> | ||
##contig=<ID=Pf3D7_02_v3,length=947102> | ||
##contig=<ID=Pf3D7_03_v3,length=1067971> | ||
##contig=<ID=Pf3D7_04_v3,length=1200490> | ||
##contig=<ID=Pf3D7_05_v3,length=1343557> | ||
##contig=<ID=Pf3D7_06_v3,length=1418242> | ||
##contig=<ID=Pf3D7_07_v3,length=1445207> | ||
##contig=<ID=Pf3D7_08_v3,length=1472805> | ||
##contig=<ID=Pf3D7_09_v3,length=1541735> | ||
##contig=<ID=Pf3D7_10_v3,length=1687656> | ||
##contig=<ID=Pf3D7_11_v3,length=2038340> | ||
##contig=<ID=Pf3D7_12_v3,length=2271494> | ||
##contig=<ID=Pf3D7_13_v3,length=2925236> | ||
##contig=<ID=Pf3D7_14_v3,length=3291936> | ||
##contig=<ID=Pf3D7_API_v3,length=34250> | ||
##contig=<ID=Pf3D7_MIT_v3,length=5967> | ||
##source=HaplotypeCaller | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SENTh086.12-073.18_SEN.Di | ||
Pf3D7_12_v3 646914 . TATA *,T,<NON_REF> 0.01 . HAPCOMP=0,0,0;HAPDOM=0.00,0.800,0.00;HEC=7,4,1,0,0,0,0,0,0;MLEAC=0,0,0;MLEAF=NaN,NaN,NaN;NDA=3 GT:PL ./.:0,0,0,0,0,0,0,0,0,0 | ||
Pf3D7_12_v3 646918 . T <NON_REF> . . END=646919 GT:DP:GQ:MIN_DP:PL 0/0:0:0:0:0,0,0 | ||
Pf3D7_12_v3 646920 . TTTTTTTTTTTTTC *,T,<NON_REF> 0 . DP=5;ExcessHet=0.0000;HAPCOMP=0,1,0;HAPDOM=0.00,1.00,0.00;HEC=7,4,1,0,0,0,0,0,0;MLEAC=0,0,0;MLEAF=0.00,0.00,0.00;NDA=3;RAW_MQandDP=18000,5 GT:AD:DP:GQ:PL:SB 0/0:4,0,0,0:4:12:0,15,123,12,114,180,15,124,152,151:2,2,0,0 | ||
Pf3D7_12_v3 646923 . T A,*,<NON_REF> 0.01 . DP=4;ExcessHet=0.0000;HAPCOMP=0,0,0;HAPDOM=0.111,0.00,0.00;HEC=7,4,1,0,0,0;MLEAC=0,0,0;MLEAF=0.00,0.00,0.00;NDA=3;RAW_MQandDP=14400,4 GT:AD:DP:GQ:PL:SB 0/0:0,0,0,0:0:0:0,0,1,12,13,99,11,12,61,50:0,0,0,0 | ||
Pf3D7_12_v3 646924 . T <NON_REF> . . END=646925 GT:DP:GQ:MIN_DP:PL 0/0:4:12:4:0,12,99 | ||
Pf3D7_12_v3 646926 . TTTTTTTC *,T,<NON_REF> 0 . DP=5;ExcessHet=0.0000;HAPCOMP=0,0,0;HAPDOM=0.00,1.00,0.00;HEC=7,4,1,0,0,0,0,0,0;MLEAC=0,0,0;MLEAF=0.00,0.00,0.00;NDA=3;RAW_MQandDP=18000,5 GT:AD:DP:GQ:PL:SB 0/0:0,0,0,0:0:0:0,12,180,0,12,0,12,102,12,90:0,0,0,0 | ||
Pf3D7_12_v3 646927 . TTTTTTC T,*,<NON_REF> 0.01 . DP=5;ExcessHet=0.0000;HAPCOMP=1,0,0;HAPDOM=0.111,0.00,0.00;HEC=7,4,1,0,0,0,0;MLEAC=0,0,0;MLEAF=0.00,0.00,0.00;NDA=3;RAW_MQandDP=18000,5 GT:AD:DP:GQ:PL:SB 1/2:0,0,0,0:0:0:1,1,1,1,0,0,1,1,1,1:0,0,0,0 | ||
Pf3D7_12_v3 646933 . C *,T,<NON_REF> 3.15 . DP=5;ExcessHet=0.0000;HAPCOMP=0,1,0;HAPDOM=0.00,0.111,0.00;HEC=7,4,1,0,0,0,0;MLEAC=1,1,0;MLEAF=0.500,0.500,0.00;NDA=3;RAW_MQandDP=18000,5 GT:AD:DP:GQ:PL:SB 1/1:0,0,0,0:0:1:123,15,0,16,1,1,76,14,15,62:0,0,0,0 | ||
Pf3D7_12_v3 646934 . T <NON_REF> . . END=646947 GT:DP:GQ:MIN_DP:PL 0/0:7:15:5:0,15,124 | ||
Pf3D7_12_v3 646948 . C <NON_REF> . . END=646948 GT:DP:GQ:MIN_DP:PL 0/0:3:9:3:0,9,74 | ||
Pf3D7_12_v3 646949 . A <NON_REF> . . END=646953 GT:DP:GQ:MIN_DP:PL 0/0:5:15:5:0,15,124 | ||
Pf3D7_12_v3 646954 . G <NON_REF> . . END=646955 GT:DP:GQ:MIN_DP:PL 0/0:7:21:7:0,21,163 | ||
Pf3D7_12_v3 646956 . C <NON_REF> . . END=646956 GT:DP:GQ:MIN_DP:PL 0/0:8:10:8:0,10,159 | ||
Pf3D7_12_v3 646957 . A <NON_REF> . . END=646991 GT:DP:GQ:MIN_DP:PL 0/0:8:21:7:0,21,163 |
Binary file added
BIN
+206 Bytes
...e/hellbender/tools/walkers/variantutils/ReblockGVCF/reblock_cleanup_bug_variant.g.vcf.idx
Binary file not shown.