diff --git a/.github/.dockstore.yml b/.github/.dockstore.yml
index 3bcad4198..974fdeee6 100644
--- a/.github/.dockstore.yml
+++ b/.github/.dockstore.yml
@@ -4,7 +4,7 @@ workflows:
     name: GatherSampleEvidence
     primaryDescriptorPath: /wdl/GatherSampleEvidence.wdl
     filters:
-      branches: 
+      branches:
         - main
       tags:
         - /.*/
@@ -13,7 +13,7 @@ workflows:
     name: EvidenceQC
     primaryDescriptorPath: /wdl/EvidenceQC.wdl
     filters:
-      branches: 
+      branches:
         - main
       tags:
         - /.*/
@@ -22,7 +22,7 @@ workflows:
     name: TrainGCNV
     primaryDescriptorPath: /wdl/TrainGCNV.wdl
     filters:
-      branches: 
+      branches:
         - main
       tags:
         - /.*/
@@ -31,7 +31,7 @@ workflows:
     name: GatherBatchEvidence
     primaryDescriptorPath: /wdl/GatherBatchEvidence.wdl
     filters:
-      branches: 
+      branches:
         - main
       tags:
         - /.*/
@@ -40,7 +40,7 @@ workflows:
     name: ClusterBatch
     primaryDescriptorPath: /wdl/ClusterBatch.wdl
     filters:
-      branches: 
+      branches:
         - main
       tags:
         - /.*/
@@ -49,7 +49,7 @@ workflows:
     name: GenerateBatchMetrics
     primaryDescriptorPath: /wdl/GenerateBatchMetrics.wdl
     filters:
-      branches: 
+      branches:
         - main
       tags:
         - /.*/
@@ -58,7 +58,7 @@ workflows:
     name: FilterBatchSites
     primaryDescriptorPath: /wdl/FilterBatchSites.wdl
     filters:
-      branches: 
+      branches:
         - main
       tags:
         - /.*/
@@ -67,7 +67,7 @@ workflows:
     name: PlotSVCountsPerSample
     primaryDescriptorPath: /wdl/PlotSVCountsPerSample.wdl
     filters:
-      branches: 
+      branches:
         - main
       tags:
         - /.*/
@@ -76,7 +76,7 @@ workflows:
     name: FilterBatchSamples
     primaryDescriptorPath: /wdl/FilterBatchSamples.wdl
     filters:
-      branches: 
+      branches:
         - main
       tags:
         - /.*/
@@ -85,7 +85,7 @@ workflows:
     name: MergeBatchSites
     primaryDescriptorPath: /wdl/MergeBatchSites.wdl
     filters:
-      branches: 
+      branches:
         - main
       tags:
         - /.*/
@@ -94,7 +94,7 @@ workflows:
     name: GenotypeBatch
     primaryDescriptorPath: /wdl/GenotypeBatch.wdl
     filters:
-      branches: 
+      branches:
         - main
       tags:
         - /.*/
@@ -103,7 +103,7 @@ workflows:
     name: RegenotypeCNVs
     primaryDescriptorPath: /wdl/RegenotypeCNVs.wdl
     filters:
-      branches: 
+      branches:
         - main
       tags:
         - /.*/
@@ -112,7 +112,7 @@ workflows:
     name: CombineBatches
     primaryDescriptorPath: /wdl/CombineBatches.wdl
     filters:
-      branches: 
+      branches:
         - main
       tags:
         - /.*/
@@ -121,7 +121,7 @@ workflows:
     name: ResolveComplexVariants
     primaryDescriptorPath: /wdl/ResolveComplexVariants.wdl
     filters:
-      branches: 
+      branches:
         - main
       tags:
         - /.*/
@@ -130,7 +130,7 @@ workflows:
     name: GenotypeComplexVariants
     primaryDescriptorPath: /wdl/GenotypeComplexVariants.wdl
     filters:
-      branches: 
+      branches:
         - main
       tags:
         - /.*/
@@ -139,7 +139,43 @@ workflows:
     name: CleanVcf
     primaryDescriptorPath: /wdl/CleanVcf.wdl
     filters:
-      branches: 
+      branches:
+        - main
+      tags:
+        - /.*/
+
+  - subclass: WDL
+    name: RefineComplexVariants
+    primaryDescriptorPath: /wdl/RefineComplexVariants.wdl
+    filters:
+      branches:
+        - main
+      tags:
+        - /.*/
+
+  - subclass: WDL
+    name: JoinRawCalls
+    primaryDescriptorPath: /wdl/JoinRawCalls.wdl
+    filters:
+      branches:
+        - main
+      tags:
+        - /.*/
+
+  - subclass: WDL
+    name: SVConcordance
+    primaryDescriptorPath: /wdl/SVConcordance.wdl
+    filters:
+      branches:
+        - main
+      tags:
+        - /.*/
+
+  - subclass: WDL
+    name: FilterGenotypes
+    primaryDescriptorPath: /wdl/FilterGenotypes.wdl
+    filters:
+      branches:
         - main
       tags:
         - /.*/
@@ -148,7 +184,7 @@ workflows:
     name: MainVcfQc
     primaryDescriptorPath: /wdl/MainVcfQc.wdl
     filters:
-      branches: 
+      branches:
         - main
       tags:
         - /.*/
@@ -157,7 +193,7 @@ workflows:
     name: AnnotateVcf
     primaryDescriptorPath: /wdl/AnnotateVcf.wdl
     filters:
-      branches: 
+      branches:
         - main
       tags:
         - /.*/
diff --git a/README.md b/README.md
index cb8808221..09146ef12 100644
--- a/README.md
+++ b/README.md
@@ -27,4 +27,4 @@ continuous integration (CI) and continuous delivery (CD).
 GATK-SV CI/CD is developed as a set of Github Actions
 workflows that are available under the `.github/workflows`
 directory. Please refer to the [workflow's README](.github/workflows/README.md) 
-for their current coverage and setup. 
+for their current coverage and setup. 
\ No newline at end of file
diff --git a/inputs/templates/terra_workspaces/cohort_mode/cohort_mode_workspace_dashboard.md.tmpl b/inputs/templates/terra_workspaces/cohort_mode/cohort_mode_workspace_dashboard.md.tmpl
index 224f83af9..c22618380 100644
--- a/inputs/templates/terra_workspaces/cohort_mode/cohort_mode_workspace_dashboard.md.tmpl
+++ b/inputs/templates/terra_workspaces/cohort_mode/cohort_mode_workspace_dashboard.md.tmpl
@@ -61,10 +61,11 @@ The following workflows are included in this workspace, to be executed in this o
 13. `13-ResolveComplexVariants`: Complex variant resolution
 14. `14-GenotypeComplexVariants`: Complex variant re-genotyping
 15. `15-CleanVcf`: VCF cleanup
-16. `16-JoinRawCalls`: Combines unfiltered calls (from step 5) across batches
-17. `17-SVConcordance`: Annotates variants with genotype concordance against raw calls
-18. `18-FilterGenotypes`: Performs genotype filtering to improve precision and generates QC plots
-19. `19-AnnotateVcf`: Cohort VCF annotations, including functional annotation, allele frequency (AF) annotation, and AF annotation with external population callsets
+16. `16-RefineComplexVariants`: Complex variant and translocation refinement
+17. `17-JoinRawCalls`: Combines unfiltered calls (from step 5) across batches
+18. `18-SVConcordance`: Annotates variants with genotype concordance against raw calls
+19. `19-FilterGenotypes`: Performs genotype filtering to improve precision and generates QC plots
+20. `20-AnnotateVcf`: Cohort VCF annotations, including functional annotation, allele frequency (AF) annotation, and AF annotation with external population callsets
 
 Additional downstream modules, such as those for visualization, are under development. They are not included in this workspace at this time, but the source code can be found in the [GATK-SV GitHub repository](https://github.com/broadinstitute/gatk-sv). See **Downstream steps** towards the bottom of this page for more information.
 
@@ -205,9 +206,9 @@ Read the full MergeBatchSites documentation [here](https://github.com/broadinsti
 Read the full GenotypeBatch documentation [here](https://github.com/broadinstitute/gatk-sv#genotype-batch).
 * Use the same `sample_set` definitions you used for `03-TrainGCNV` through `08-FilterBatchSamples`.
 
-#### 11-RegenotypeCNVs, 12-CombineBatches, 13-ResolveComplexVariants, 14-GenotypeComplexVariants, 15-CleanVcf, 16-JoinRawCalls, 17-SVConcordance, 18-FilterGenotypes, and 19-AnnotateVcf
+#### 11-RegenotypeCNVs, 12-CombineBatches, 13-ResolveComplexVariants, 14-GenotypeComplexVariants, 15-CleanVcf, 16-RefineComplexVariants, 17-JoinRawCalls, 18-SVConcordance, 19-FilterGenotypes, and 20-AnnotateVcf
 
-Read the full documentation for [RegenotypeCNVs](https://github.com/broadinstitute/gatk-sv#regenotype-cnvs), [MakeCohortVcf](https://github.com/broadinstitute/gatk-sv#make-cohort-vcf) (which includes `CombineBatches`, `ResolveComplexVariants`, `GenotypeComplexVariants`, `CleanVcf`), [`JoinRawCalls`](https://github.com/broadinstitute/gatk-sv#join-raw-calls), [`SVConcordance`](https://github.com/broadinstitute/gatk-sv#svconcordance), [`FilterGenotypes`](https://github.com/broadinstitute/gatk-sv#filter-genotypes), and [AnnotateVcf](https://github.com/broadinstitute/gatk-sv#annotate-vcf) on the README.
+Read the full documentation for [RegenotypeCNVs](https://github.com/broadinstitute/gatk-sv#regenotype-cnvs), [MakeCohortVcf](https://github.com/broadinstitute/gatk-sv#make-cohort-vcf) (which includes `CombineBatches`, `ResolveComplexVariants`, `GenotypeComplexVariants`, `CleanVcf`), [`RefineComplexVariants`](https://github.com/broadinstitute/gatk-sv#refine-complex), [`JoinRawCalls`](https://github.com/broadinstitute/gatk-sv#join-raw-calls), [`SVConcordance`](https://github.com/broadinstitute/gatk-sv#svconcordance), [`FilterGenotypes`](https://github.com/broadinstitute/gatk-sv#filter-genotypes), and [AnnotateVcf](https://github.com/broadinstitute/gatk-sv#annotate-vcf) on the README.
 * Use the same cohort `sample_set_set` you created and used for `09-MergeBatchSites`.
 
 #### Downstream steps
diff --git a/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/CleanVcf.json.tmpl b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/CleanVcf.json.tmpl
index e7c1bc7a0..93c172d8c 100644
--- a/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/CleanVcf.json.tmpl
+++ b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/CleanVcf.json.tmpl
@@ -1,6 +1,8 @@
 {
   "CleanVcf.contig_list": "${workspace.primary_contigs_fai}",
   "CleanVcf.allosome_fai": "${workspace.allosome_file}",
+  "CleanVcf.HERVK_reference": "${workspace.HERVK_reference}",
+  "CleanVcf.LINE1_reference": "${workspace.LINE1_reference}",
   "CleanVcf.chr_x": "${workspace.chr_x}",
   "CleanVcf.chr_y": "${workspace.chr_y}",
 
diff --git a/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/MakeCohortVcf.json.tmpl b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/MakeCohortVcf.json.tmpl
index ccab739da..f3fbfb018 100644
--- a/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/MakeCohortVcf.json.tmpl
+++ b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/MakeCohortVcf.json.tmpl
@@ -8,6 +8,8 @@
   "MakeCohortVcf.depth_exclude_list": "${workspace.depth_exclude_list}",
   "MakeCohortVcf.empty_file" : "${workspace.empty_file}",
   "MakeCohortVcf.ref_dict": "${workspace.reference_dict}",
+  "MakeCohortVcf.HERVK_reference": "${workspace.HERVK_reference}",
+  "MakeCohortVcf.LINE1_reference": "${workspace.LINE1_reference}",
 
   "MakeCohortVcf.site_level_comparison_datasets": [
     {{ reference_resources.ccdg_abel_site_level_benchmarking_dataset | tojson }},
diff --git a/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/RefineComplexVariants.json.tmpl b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/RefineComplexVariants.json.tmpl
new file mode 100644
index 000000000..fb95e1ce4
--- /dev/null
+++ b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/RefineComplexVariants.json.tmpl
@@ -0,0 +1,17 @@
+{
+  "RefineComplexVariants.vcf": "${this.cleaned_vcf}",
+  "RefineComplexVariants.prefix": "${this.sample_set_set_id}",
+
+  "RefineComplexVariants.batch_name_list": "${this.sample_sets.sample_set_id}",
+  "RefineComplexVariants.batch_sample_lists": "${this.sample_sets.filtered_batch_samples_file}",
+  "RefineComplexVariants.PE_metrics": "${this.sample_sets.merged_PE}",
+  "RefineComplexVariants.PE_metrics_indexes": "${this.sample_sets.merged_PE_index}",
+  "RefineComplexVariants.Depth_DEL_beds": "${this.sample_sets.merged_dels}",
+  "RefineComplexVariants.Depth_DUP_beds": "${this.sample_sets.merged_dups}",
+
+  "RefineComplexVariants.n_per_split": "15000",
+
+  "RefineComplexVariants.sv_base_mini_docker": "${workspace.sv_base_mini_docker}",
+  "RefineComplexVariants.sv_pipeline_docker": "${workspace.sv_pipeline_docker}",
+  "RefineComplexVariants.linux_docker": "${workspace.linux_docker}"
+}
diff --git a/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/SVConcordance.json.tmpl b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/SVConcordance.json.tmpl
index ff48489ac..1c9dfed26 100644
--- a/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/SVConcordance.json.tmpl
+++ b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/SVConcordance.json.tmpl
@@ -2,7 +2,7 @@
   "SVConcordance.gatk_docker": "${workspace.gatk_docker}",
   "SVConcordance.sv_base_mini_docker": "${workspace.sv_base_mini_docker}",
 
-  "SVConcordance.eval_vcf" : "${this.cleaned_vcf}",
+  "SVConcordance.eval_vcf" : "${this.cpx_refined_vcf}",
   "SVConcordance.truth_vcf" : "${this.joined_raw_calls_vcf}",
 
   "SVConcordance.output_prefix": "${this.sample_set_set_id}",
diff --git a/inputs/templates/terra_workspaces/cohort_mode/workspace.tsv.tmpl b/inputs/templates/terra_workspaces/cohort_mode/workspace.tsv.tmpl
index b9d5f26d5..6ee7a177d 100644
--- a/inputs/templates/terra_workspaces/cohort_mode/workspace.tsv.tmpl
+++ b/inputs/templates/terra_workspaces/cohort_mode/workspace.tsv.tmpl
@@ -29,6 +29,8 @@ exclude_intervals_for_gcnv_filter_intervals	{{ reference_resources.exclude_inter
 external_af_ref_bed	{{ reference_resources.external_af_ref_bed }}
 external_af_ref_bed_prefix	{{ reference_resources.external_af_ref_bed_prefix }}
 genome_file	{{ reference_resources.genome_file }}
+HERVK_reference	{{ reference_resources.hervk_reference }}
+LINE1_reference	{{ reference_resources.line1_reference }}
 manta_region_bed	{{ reference_resources.manta_region_bed }}
 manta_region_bed_index	{{ reference_resources.manta_region_bed_index }}
 mei_bed	{{ reference_resources.mei_bed }}
diff --git a/inputs/templates/terra_workspaces/single_sample/GATKSVPipelineSingleSample.json.tmpl b/inputs/templates/terra_workspaces/single_sample/GATKSVPipelineSingleSample.json.tmpl
index e5a920a0c..88c735b2d 100644
--- a/inputs/templates/terra_workspaces/single_sample/GATKSVPipelineSingleSample.json.tmpl
+++ b/inputs/templates/terra_workspaces/single_sample/GATKSVPipelineSingleSample.json.tmpl
@@ -97,6 +97,9 @@
   "GATKSVPipelineSingleSample.clean_vcf_random_seed": 0,
   "GATKSVPipelineSingleSample.run_vcf_qc" : false,
 
+  "GATKSVPipelineSingleSample.MakeCohortVcf.HERVK_reference": "${workspace.reference_HERVK}",
+  "GATKSVPipelineSingleSample.MakeCohortVcf.LINE1_reference": "${workspace.reference_LINE1}",
+
   "GATKSVPipelineSingleSample.protein_coding_gtf" : "${workspace.reference_protein_coding_gtf}",
   "GATKSVPipelineSingleSample.noncoding_bed" :       "${workspace.reference_noncoding_bed}",
   "GATKSVPipelineSingleSample.external_af_ref_bed" :       "${workspace.reference_external_af_ref_bed}",
diff --git a/inputs/templates/terra_workspaces/single_sample/workspace.tsv.tmpl b/inputs/templates/terra_workspaces/single_sample/workspace.tsv.tmpl
index 25e13568f..2bcef0ad2 100644
--- a/inputs/templates/terra_workspaces/single_sample/workspace.tsv.tmpl
+++ b/inputs/templates/terra_workspaces/single_sample/workspace.tsv.tmpl
@@ -55,6 +55,8 @@ reference_exclude_intervals_for_gcnv_filter_intervals	{{ reference_resources.exc
 reference_external_af_ref_bed	{{ reference_resources.external_af_ref_bed }}
 reference_external_af_ref_bed_prefix	{{ reference_resources.external_af_ref_bed_prefix }}
 reference_genome_file	{{ reference_resources.genome_file }}
+reference_HERVK	{{ reference_resources.hervk_reference }}
+reference_LINE1	{{ reference_resources.line1_reference }}
 reference_manta_region_bed	{{ reference_resources.manta_region_bed }}
 reference_manta_region_bed_index	{{ reference_resources.manta_region_bed_index }}
 reference_mei_bed	{{ reference_resources.mei_bed }}
diff --git a/inputs/templates/test/FilterGenotypes/FilterGenotypes.fixed_cutoffs.json.tmpl b/inputs/templates/test/FilterGenotypes/FilterGenotypes.fixed_cutoffs.json.tmpl
index 5035f0d0b..1025504cf 100644
--- a/inputs/templates/test/FilterGenotypes/FilterGenotypes.fixed_cutoffs.json.tmpl
+++ b/inputs/templates/test/FilterGenotypes/FilterGenotypes.fixed_cutoffs.json.tmpl
@@ -5,7 +5,13 @@
   "FilterGenotypes.gq_recalibrator_model_file": {{ reference_resources.aou_recalibrate_gq_model_file | tojson }},
   "FilterGenotypes.sl_filter_args": "--small-del-threshold 93 --medium-del-threshold 150 --small-dup-threshold -51 --medium-dup-threshold -4 --ins-threshold -13 --inv-threshold -19",
 
-  "FilterGenotypes.genome_tracks": {{ reference_resources.recalibrate_gq_genome_tracks | tojson }},
+  "FilterGenotypes.genome_tracks": [
+    {{ reference_resources.recalibrate_gq_genome_track_repeatmasker | tojson }},
+    {{ reference_resources.recalibrate_gq_genome_track_segdup | tojson }},
+    {{ reference_resources.recalibrate_gq_genome_track_simple_repeats | tojson }},
+    {{ reference_resources.recalibrate_gq_genome_track_umap100 | tojson }},
+    {{ reference_resources.recalibrate_gq_genome_track_umap24 | tojson }}
+  ],
   "FilterGenotypes.recalibrate_gq_args": [
     "--keep-homvar false",
     "--keep-homref true",
diff --git a/inputs/templates/test/FilterGenotypes/FilterGenotypes.optimize_cutoffs.json.tmpl b/inputs/templates/test/FilterGenotypes/FilterGenotypes.optimize_cutoffs.json.tmpl
index b11a12812..53e50c1ba 100644
--- a/inputs/templates/test/FilterGenotypes/FilterGenotypes.optimize_cutoffs.json.tmpl
+++ b/inputs/templates/test/FilterGenotypes/FilterGenotypes.optimize_cutoffs.json.tmpl
@@ -6,7 +6,13 @@
 
   "FilterGenotypes.primary_contigs_fai": {{ reference_resources.primary_contigs_fai | tojson }},
   "FilterGenotypes.gq_recalibrator_model_file": {{ reference_resources.aou_recalibrate_gq_model_file | tojson }},
-  "FilterGenotypes.genome_tracks": {{ reference_resources.recalibrate_gq_genome_tracks | tojson }},
+  "FilterGenotypes.genome_tracks": [
+    {{ reference_resources.recalibrate_gq_genome_track_repeatmasker | tojson }},
+    {{ reference_resources.recalibrate_gq_genome_track_segdup | tojson }},
+    {{ reference_resources.recalibrate_gq_genome_track_simple_repeats | tojson }},
+    {{ reference_resources.recalibrate_gq_genome_track_umap100 | tojson }},
+    {{ reference_resources.recalibrate_gq_genome_track_umap24 | tojson }}
+  ],
   "FilterGenotypes.recalibrate_gq_args": [
     "--keep-homvar false",
     "--keep-homref true",
diff --git a/inputs/templates/test/GATKSVPipelineBatch/GATKSVPipelineBatch.FromSampleEvidence.json.tmpl b/inputs/templates/test/GATKSVPipelineBatch/GATKSVPipelineBatch.FromSampleEvidence.json.tmpl
index 2f5ba41c4..2bfbc4f1b 100644
--- a/inputs/templates/test/GATKSVPipelineBatch/GATKSVPipelineBatch.FromSampleEvidence.json.tmpl
+++ b/inputs/templates/test/GATKSVPipelineBatch/GATKSVPipelineBatch.FromSampleEvidence.json.tmpl
@@ -134,6 +134,8 @@
   "GATKSVPipelineBatch.MakeCohortVcf.mei_bed": {{ reference_resources.mei_bed | tojson }},
   "GATKSVPipelineBatch.MakeCohortVcf.pe_exclude_list": {{ reference_resources.pesr_exclude_list | tojson }},
   "GATKSVPipelineBatch.MakeCohortVcf.depth_exclude_list": {{ reference_resources.depth_exclude_list | tojson }},
+  "GATKSVPipelineBatch.MakeCohortVcf.HERVK_reference": {{ reference_resources.hervk_reference | tojson }},
+  "GATKSVPipelineBatch.MakeCohortVcf.LINE1_reference": {{ reference_resources.line1_reference | tojson }},
   "GATKSVPipelineBatch.MakeCohortVcf.min_sr_background_fail_batches": 0.5,
   "GATKSVPipelineBatch.MakeCohortVcf.max_shards_per_chrom_clean_vcf_step1": 200,
   "GATKSVPipelineBatch.MakeCohortVcf.min_records_per_shard_clean_vcf_step1": 5000,
diff --git a/inputs/templates/test/GATKSVPipelineBatch/GATKSVPipelineBatch.json.tmpl b/inputs/templates/test/GATKSVPipelineBatch/GATKSVPipelineBatch.json.tmpl
index 5412fd79e..9ba04d229 100644
--- a/inputs/templates/test/GATKSVPipelineBatch/GATKSVPipelineBatch.json.tmpl
+++ b/inputs/templates/test/GATKSVPipelineBatch/GATKSVPipelineBatch.json.tmpl
@@ -127,6 +127,8 @@
   "GATKSVPipelineBatch.MakeCohortVcf.mei_bed": {{ reference_resources.mei_bed | tojson }},
   "GATKSVPipelineBatch.MakeCohortVcf.pe_exclude_list": {{ reference_resources.pesr_exclude_list | tojson }},
   "GATKSVPipelineBatch.MakeCohortVcf.depth_exclude_list": {{ reference_resources.depth_exclude_list | tojson }},
+  "GATKSVPipelineBatch.MakeCohortVcf.HERVK_reference": {{ reference_resources.hervk_reference | tojson }},
+  "GATKSVPipelineBatch.MakeCohortVcf.LINE1_reference": {{ reference_resources.line1_reference | tojson }},
   "GATKSVPipelineBatch.MakeCohortVcf.min_sr_background_fail_batches": 0.5,
   "GATKSVPipelineBatch.MakeCohortVcf.max_shards_per_chrom_clean_vcf_step1": 200,
   "GATKSVPipelineBatch.MakeCohortVcf.min_records_per_shard_clean_vcf_step1": 5000,
diff --git a/inputs/templates/test/GATKSVPipelineSingleSample/GATKSVPipelineSingleSample.json.tmpl b/inputs/templates/test/GATKSVPipelineSingleSample/GATKSVPipelineSingleSample.json.tmpl
index f08b38902..870b5b82f 100644
--- a/inputs/templates/test/GATKSVPipelineSingleSample/GATKSVPipelineSingleSample.json.tmpl
+++ b/inputs/templates/test/GATKSVPipelineSingleSample/GATKSVPipelineSingleSample.json.tmpl
@@ -101,6 +101,9 @@
   "GATKSVPipelineSingleSample.clean_vcf5_records_per_shard": 5000,
   "GATKSVPipelineSingleSample.run_vcf_qc" : false,
 
+  "GATKSVPipelineSingleSample.MakeCohortVcf.HERVK_reference": {{ reference_resources.hervk_reference | tojson }},
+  "GATKSVPipelineSingleSample.MakeCohortVcf.LINE1_reference": {{ reference_resources.line1_reference | tojson }},
+
   "GATKSVPipelineSingleSample.protein_coding_gtf" : {{ reference_resources.protein_coding_gtf | tojson }},
   "GATKSVPipelineSingleSample.noncoding_bed" :       {{ reference_resources.noncoding_bed | tojson }},
   "GATKSVPipelineSingleSample.external_af_ref_bed" :       {{ reference_resources.external_af_ref_bed | tojson }},
diff --git a/inputs/templates/test/GATKSVPipelineSingleSample/GATKSVPipelineSingleSample.melt.json.tmpl b/inputs/templates/test/GATKSVPipelineSingleSample/GATKSVPipelineSingleSample.melt.json.tmpl
index 4d35ca4ae..8352403df 100644
--- a/inputs/templates/test/GATKSVPipelineSingleSample/GATKSVPipelineSingleSample.melt.json.tmpl
+++ b/inputs/templates/test/GATKSVPipelineSingleSample/GATKSVPipelineSingleSample.melt.json.tmpl
@@ -90,6 +90,10 @@
   "GATKSVPipelineSingleSample.depth_clustering_algorithm": "SINGLE_LINKAGE",
 
   "GATKSVPipelineSingleSample.ref_copy_number_autosomal_contigs" : 2,
+
+  "GATKSVPipelineSingleSample.MakeCohortVcf.HERVK_reference": {{ reference_resources.hervk_reference | tojson }},
+  "GATKSVPipelineSingleSample.MakeCohortVcf.LINE1_reference": {{ reference_resources.line1_reference | tojson }},
+
   "GATKSVPipelineSingleSample.clean_vcf_min_sr_background_fail_batches": 0.5,
   "GATKSVPipelineSingleSample.max_shard_size_resolve" : 500,
   "GATKSVPipelineSingleSample.clean_vcf_max_shards_per_chrom_clean_vcf_step1": 200,
diff --git a/inputs/templates/test/MakeCohortVcf/CleanVcf.json.tmpl b/inputs/templates/test/MakeCohortVcf/CleanVcf.json.tmpl
index 8606a4a50..196505724 100644
--- a/inputs/templates/test/MakeCohortVcf/CleanVcf.json.tmpl
+++ b/inputs/templates/test/MakeCohortVcf/CleanVcf.json.tmpl
@@ -1,6 +1,8 @@
 {
   "CleanVcf.contig_list": {{ reference_resources.primary_contigs_fai | tojson }},
   "CleanVcf.allosome_fai": {{ reference_resources.allosome_file | tojson }},
+  "CleanVcf.HERVK_reference": {{ reference_resources.hervk_reference | tojson }},
+  "CleanVcf.LINE1_reference": {{ reference_resources.line1_reference | tojson }},
   "CleanVcf.chr_x": {{ reference_resources.chr_x | tojson }},
   "CleanVcf.chr_y": {{ reference_resources.chr_y | tojson }},
 
diff --git a/inputs/templates/test/MakeCohortVcf/MakeCohortVcf.json.tmpl b/inputs/templates/test/MakeCohortVcf/MakeCohortVcf.json.tmpl
index 8c27a74cf..cad2a9e03 100644
--- a/inputs/templates/test/MakeCohortVcf/MakeCohortVcf.json.tmpl
+++ b/inputs/templates/test/MakeCohortVcf/MakeCohortVcf.json.tmpl
@@ -8,6 +8,8 @@
   "MakeCohortVcf.depth_exclude_list": {{ reference_resources.depth_exclude_list | tojson }},
   "MakeCohortVcf.empty_file" : {{ reference_resources.empty_file | tojson }},
   "MakeCohortVcf.ref_dict": {{ reference_resources.reference_dict | tojson }},
+  "MakeCohortVcf.HERVK_reference": {{ reference_resources.hervk_reference | tojson }},
+  "MakeCohortVcf.LINE1_reference": {{ reference_resources.line1_reference | tojson }},
 
   "MakeCohortVcf.site_level_comparison_datasets": [
     {{ reference_resources.ccdg_abel_site_level_benchmarking_dataset | tojson }},
diff --git a/inputs/templates/test/RefineComplexVariants/RefineComplexVariants.json.tmpl b/inputs/templates/test/RefineComplexVariants/RefineComplexVariants.json.tmpl
new file mode 100644
index 000000000..c9feb45a2
--- /dev/null
+++ b/inputs/templates/test/RefineComplexVariants/RefineComplexVariants.json.tmpl
@@ -0,0 +1,17 @@
+{
+  "RefineComplexVariants.vcf": {{ test_batch.clean_vcf | tojson }},
+  "RefineComplexVariants.prefix": {{ test_batch.name | tojson }},
+
+  "RefineComplexVariants.batch_name_list": [{{ test_batch.name | tojson }}],
+  "RefineComplexVariants.batch_sample_lists": [ {{ test_batch.final_sample_list | tojson }} ],
+  "RefineComplexVariants.PE_metrics": [ {{ test_batch.merged_disc_file | tojson }} ],
+  "RefineComplexVariants.PE_metrics_indexes": [ {{ test_batch.merged_disc_file_index | tojson }} ],
+  "RefineComplexVariants.Depth_DEL_beds": [ {{ test_batch.del_bed | tojson }} ],
+  "RefineComplexVariants.Depth_DUP_beds": [ {{ test_batch.dup_bed | tojson }} ],
+
+  "RefineComplexVariants.n_per_split": "15000",
+
+  "RefineComplexVariants.sv_base_mini_docker": {{ dockers.sv_base_mini_docker | tojson }},
+  "RefineComplexVariants.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }},
+  "RefineComplexVariants.linux_docker": {{ dockers.linux_docker | tojson }}
+}
diff --git a/inputs/templates/test/SVConcordance/SVConcordance.json.tmpl b/inputs/templates/test/SVConcordance/SVConcordance.json.tmpl
index 0540305cd..de29c485f 100644
--- a/inputs/templates/test/SVConcordance/SVConcordance.json.tmpl
+++ b/inputs/templates/test/SVConcordance/SVConcordance.json.tmpl
@@ -2,7 +2,7 @@
   "SVConcordance.gatk_docker": {{ dockers.gatk_docker | tojson }},
   "SVConcordance.sv_base_mini_docker": {{ dockers.sv_base_mini_docker | tojson }},
 
-  "SVConcordance.eval_vcf" : {{ test_batch.gatk_formatted_vcf | tojson }},
+  "SVConcordance.eval_vcf" : {{ test_batch.complex_refined_vcf | tojson }},
   "SVConcordance.truth_vcf" : {{ test_batch.joined_raw_calls_vcf | tojson }},
 
   "SVConcordance.output_prefix": {{ test_batch.name | tojson }},
diff --git a/inputs/values/ref_panel_1kg.json b/inputs/values/ref_panel_1kg.json
index 4ee29e419..f317967a5 100644
--- a/inputs/values/ref_panel_1kg.json
+++ b/inputs/values/ref_panel_1kg.json
@@ -1166,8 +1166,8 @@
         "gs://gatk-sv-ref-panel-1kg/outputs/mw-make-cohort-vcf-templates/ResolveComplexVariants/breakpoint-overlap-dropped-record-vcfs/ref_panel_1kg.chrX.breakpoint_overlap.dropped_records.vcf.gz.tbi",
         "gs://gatk-sv-ref-panel-1kg/outputs/mw-make-cohort-vcf-templates/ResolveComplexVariants/breakpoint-overlap-dropped-record-vcfs/ref_panel_1kg.chrY.breakpoint_overlap.dropped_records.vcf.gz.tbi"
     ],
-    "clean_vcf": "gs://gatk-sv-ref-panel-1kg/outputs/mw-vcf-reshard/ref_panel_1kg.cleaned.vcf.gz",
-    "clean_vcf_index": "gs://gatk-sv-ref-panel-1kg/outputs/mw-vcf-reshard/ref_panel_1kg.cleaned.vcf.gz.tbi",
+    "clean_vcf": "gs://gatk-sv-ref-panel-1kg/outputs/eph-refine-cpx/ref_panel_1kg.cleaned.vcf.gz",
+    "clean_vcf_index": "gs://gatk-sv-ref-panel-1kg/outputs/eph-refine-cpx/ref_panel_1kg.cleaned.vcf.gz.tbi",
     "clean_vcf_gatk_formatter_args": "",
     "cluster_background_fail_lists": [
         "gs://gatk-sv-ref-panel-1kg/outputs/mw-make-cohort-vcf-templates/CombineBatches/cluster-background-fail-lists/ref_panel_1kg.chr1.sr_background_fail.updated2.txt",
@@ -1329,6 +1329,8 @@
         "gs://gatk-sv-ref-panel-1kg/outputs/mw-vcf-reshard/ref_panel_1kg.chrX.regenotyped.vcf.gz.tbi",
         "gs://gatk-sv-ref-panel-1kg/outputs/mw-vcf-reshard/ref_panel_1kg.chrY.regenotyped.vcf.gz.tbi"
     ],
+    "complex_refined_vcf": "gs://gatk-sv-ref-panel-1kg/outputs/eph-refine-cpx/ref_panel_1kg.cpx_refined.vcf.gz",
+    "complex_refined_vcf_index": "gs://gatk-sv-ref-panel-1kg/outputs/eph-refine-cpx/ref_panel_1kg.cpx_refined.vcf.gz",
     "complex_resolve_bothside_pass_list": "gs://gatk-sv-ref-panel-1kg/outputs/mw-vcf-reshard/ref_panel_1kg.all.sr_bothside_pass.updated3.txt",
     "complex_resolve_background_fail_list": "gs://gatk-sv-ref-panel-1kg/outputs/mw-vcf-reshard/ref_panel_1kg.all.sr_background_fail.updated3.txt",
     "complex_resolve_vcfs": [
@@ -1383,8 +1385,8 @@
         "gs://gatk-sv-ref-panel-1kg/outputs/mw-vcf-reshard/ref_panel_1kg.reshard_vcf.chrX.resharded.vcf.gz.tbi",
         "gs://gatk-sv-ref-panel-1kg/outputs/mw-vcf-reshard/ref_panel_1kg.reshard_vcf.chrY.resharded.vcf.gz.tbi"
     ],
-    "concordance_vcf": "gs://gatk-sv-ref-panel-1kg/outputs/mw_sv_concordance_update/ref_panel_1kg.concordance.vcf.gz",
-    "concordance_vcf_index": "gs://gatk-sv-ref-panel-1kg/outputs/mw_sv_concordance_update/ref_panel_1kg.concordance.vcf.gz.tbi",
+    "concordance_vcf": "gs://gatk-sv-ref-panel-1kg/outputs/eph-refine-cpx/ref_panel_1kg.concordance.vcf.gz",
+    "concordance_vcf_index": "gs://gatk-sv-ref-panel-1kg/outputs/eph-refine-cpx/ref_panel_1kg.concordance.vcf.gz.tbi",
     "contig_ploidy_model_tar": "gs://gatk-sv-resources-public/hg38/v0/sv-resources/ref-panel/1KG/v2/gcnv/ref_panel_1kg_v2-contig-ploidy-model.tar.gz",
     "counts": [
         "gs://gatk-sv-ref-panel-1kg/outputs/tws-no-cram-conversion/GatherSampleEvidenceBatch/HG00096.counts.tsv.gz",
@@ -1852,9 +1854,9 @@
     "genotype_pesr_pesr_sepcutoff": "gs://gatk-sv-ref-panel-1kg/outputs/GATKSVPipelineBatch/38c65ca4-2a07-4805-86b6-214696075fef/call-GenotypeBatch/GenotypeBatch/ad17f522-0950-4f0a-9148-a13f689082ed/call-GenotypePESRPart1/GenotypePESRPart1/40ec6d76-dd1c-432d-bfab-bc4426d0b1ec/call-TrainRDGenotyping/TrainRDGenotyping/e5540a96-9072-4719-bcfb-afccdfec15c6/call-UpdateCutoff/cacheCopy/ref_panel_1kg.pesr.pesr_sepcutoff.txt",
     "genotyped_depth_vcf": "gs://gatk-sv-ref-panel-1kg/outputs/GATKSVPipelineBatch/38c65ca4-2a07-4805-86b6-214696075fef/call-GenotypeBatch/GenotypeBatch/ad17f522-0950-4f0a-9148-a13f689082ed/call-GenotypeDepthPart2/GenotypeDepthPart2/0aafd752-e606-4196-86ac-41c1c3ce1eb2/call-ConcatGenotypedVcfs/cacheCopy/ref_panel_1kg.depth.vcf.gz",
     "genotyped_pesr_vcf": "gs://gatk-sv-ref-panel-1kg/outputs/GATKSVPipelineBatch/38c65ca4-2a07-4805-86b6-214696075fef/call-GenotypeBatch/GenotypeBatch/ad17f522-0950-4f0a-9148-a13f689082ed/call-GenotypePESRPart2/GenotypePESRPart2/ce1f4075-1a3e-44b5-9cfe-bfb701327616/call-ConcatGenotypedVcfs/cacheCopy/ref_panel_1kg.pesr.vcf.gz",
-    "genotype_filtered_vcf": "gs://gatk-sv-ref-panel-1kg/outputs/mw-filter-genotypes/1kgp/ref_panel_1kg.filter_genotypes.vcf.gz",
-    "genotype_filtered_vcf_index": "gs://gatk-sv-ref-panel-1kg/outputs/mw-filter-genotypes/1kgp/ref_panel_1kg.filter_genotypes.vcf.gz.tbi",
-    "genotype_filtered_qc_tarball": "gs://gatk-sv-ref-panel-1kg/outputs/mw-filter-genotypes/1kgp/ref_panel_1kg.filter_genotypes_SV_VCF_QC_output.tar.gz",
+    "genotype_filtered_vcf": "gs://gatk-sv-ref-panel-1kg/outputs/eph-refine-cpx/ref_panel_1kg.filter_genotypes.sanitized.vcf.gz",
+    "genotype_filtered_vcf_index": "gs://gatk-sv-ref-panel-1kg/outputs/eph-refine-cpx/ref_panel_1kg.filter_genotypes.sanitized.vcf.gz.tbi",
+    "genotype_filtered_qc_tarball": "gs://gatk-sv-ref-panel-1kg/outputs/eph-refine-cpx/ref_panel_1kg.filter_genotypes_SV_VCF_QC_output.tar.gz",
     "joined_raw_calls_vcf": "gs://gatk-sv-ref-panel-1kg/outputs/mw_sv_concordance_update/ref_panel_1kg.join_raw_calls.vcf.gz",
     "joined_raw_calls_vcf_index": "gs://gatk-sv-ref-panel-1kg/outputs/mw_sv_concordance_update/ref_panel_1kg.join_raw_calls.vcf.gz.tbi",
     "manta_vcfs": [
diff --git a/inputs/values/resources_hg38.json b/inputs/values/resources_hg38.json
index 73484d5b3..545d4b2b0 100644
--- a/inputs/values/resources_hg38.json
+++ b/inputs/values/resources_hg38.json
@@ -21,6 +21,8 @@
   "external_af_ref_bed" : "gs://gcp-public-data--broad-references/hg38/v0/sv-resources/resources/v1/gnomad_AF/gnomad_v2.1_sv.sites.GRCh38.bed.gz",
   "external_af_ref_bed_prefix" : "gnomad_v2.1_sv",
   "genome_file" : "gs://gcp-public-data--broad-references/hg38/v0/sv-resources/resources/v1/hg38.genome",
+  "hervk_reference": "gs://gatk-sv-resources-public/hg38/v0/sv-resources/resources/v1/HERVK.sorted.bed.gz",
+  "line1_reference": "gs://gatk-sv-resources-public/hg38/v0/sv-resources/resources/v1/LINE1.sorted.bed.gz",
   "manta_region_bed" : "gs://gcp-public-data--broad-references/hg38/v0/sv-resources/resources/v1/primary_contigs_plus_mito.bed.gz",
   "manta_region_bed_index" : "gs://gcp-public-data--broad-references/hg38/v0/sv-resources/resources/v1/primary_contigs_plus_mito.bed.gz.tbi",
   "mei_bed" : "gs://gatk-sv-resources-public/hg38/v0/sv-resources/resources/v1/hg38.repeatmasker.mei.with_SVA.pad_50_merged.bed.gz",
diff --git a/scripts/test/terra_validation.py b/scripts/test/terra_validation.py
index 2e9a64756..aa9079d19 100644
--- a/scripts/test/terra_validation.py
+++ b/scripts/test/terra_validation.py
@@ -113,7 +113,7 @@ def main():
     parser.add_argument("-j", "--womtool-jar", help="Path to womtool jar", required=True)
     parser.add_argument("-n", "--num-input-jsons",
                         help="Number of Terra input JSONs expected",
-                        required=False, default=24, type=int)
+                        required=False, default=25, type=int)
     parser.add_argument("--log-level",
                         help="Specify level of logging information, ie. info, warning, error (not case-sensitive)",
                         required=False, default="INFO")
diff --git a/src/sv-pipeline/scripts/apply_sl_filter.py b/src/sv-pipeline/scripts/apply_sl_filter.py
index ce0151e92..917b605fb 100644
--- a/src/sv-pipeline/scripts/apply_sl_filter.py
+++ b/src/sv-pipeline/scripts/apply_sl_filter.py
@@ -4,6 +4,7 @@
 import sys
 import pysam
 import math
+from numpy import median
 from typing import List, Text, Dict, Optional
 
 _gt_no_call_map = dict()
@@ -78,6 +79,24 @@ def _fails_filter(sl, gt_is_ref, cutoff):
     return sl < cutoff
 
 
+def recal_qual_score(record):
+    """
+    Recalibrate quality score for a single variant
+    """
+    quals = []
+    for s in [s for s in record.samples]:
+        gt = record.samples[s]['GT']
+        if _is_hom_ref(gt) or _is_no_call(gt):
+            continue
+        elif _is_hom_var(gt):
+            quals.append(99)
+        else:
+            quals.append(record.samples[s]['GQ'])
+
+    if len(quals) > 0:
+        return int(median(quals))
+
+
 def _apply_filter(record, sl_threshold, ploidy_dict, apply_hom_ref, ncr_threshold,
                   keep_gq, gq_scale_factor, upper_sl_cap, lower_sl_cap, sl_shift, max_gq):
     record.info['MINSL'] = sl_threshold
@@ -109,15 +128,19 @@ def _apply_filter(record, sl_threshold, ploidy_dict, apply_hom_ref, ncr_threshol
         gt['GT_FILTER'] = _gt_to_filter_status(gt['GT'], fails_filter)
         if fails_filter:
             gt['GT'] = _filter_gt(gt['GT'], allele)
+        if record.info['SVTYPE'] != 'CNV' and _is_no_call(gt['GT']):
             n_no_call += 1
     # Annotate metrics
     record.info['NCN'] = n_no_call
     record.info['NCR'] = n_no_call / n_samples if n_samples > 0 else None
     if ncr_threshold is not None and record.info['NCR'] is not None and record.info['NCR'] >= ncr_threshold:
         record.filter.add(_HIGH_NCR_FILTER)
+    if len(record.filter) == 0:
+        record.filter.add('PASS')
     n_sl = len(sl_list)
     record.info['SL_MEAN'] = sum(sl_list) / n_sl if n_sl > 0 else None
     record.info['SL_MAX'] = max(sl_list) if n_sl > 0 else None
+    record.qual = recal_qual_score(record)
     # Clean out AF metrics since they're no longer valid
     for field in ['AC', 'AF']:
         if field in record.info:
diff --git a/src/sv-pipeline/scripts/reformat_CPX_bed_and_generate_script.py b/src/sv-pipeline/scripts/reformat_CPX_bed_and_generate_script.py
new file mode 100644
index 000000000..8b4b8cef8
--- /dev/null
+++ b/src/sv-pipeline/scripts/reformat_CPX_bed_and_generate_script.py
@@ -0,0 +1,499 @@
+#!python
+# this script takes in the CPX SVs, reformats in svelter format, and generates the script to collect PE metrics
+import os
+import argparse
+
+
+def header_pos_readin(input_bed):
+    fin = os.popen(r'''zcat %s''' % input_bed)
+    pin = fin.readline().strip().split()
+    out = {}
+    for i in range(len(pin)):
+        out[pin[i]] = i
+    fin.close()
+    return out
+
+
+def sample_pe_readin(sample_pe_file):
+    # sample_pe_file is a 2 column file with sample ID and PE metrics on both columns
+    out = {}
+    fin = open(sample_pe_file)
+    for line in fin:
+        pin = line.strip().split()
+        if not pin[0] in out.keys():
+            out[pin[0]] = pin[1]
+    fin.close()
+    return out
+
+
+def svid_sample_readin(input_bed, header_pos):
+    out = {}
+    fin = os.popen(r'''zcat %s''' % input_bed)
+    for line in fin:
+        pin = line.strip().split('\t')
+        if not pin[0][0] == '#':
+            out[pin[header_pos['name']]] = pin[header_pos['samples']]
+    fin.close()
+    return out
+
+
+def parse_segment(segment):
+    svtype, interval = segment.split('_')
+    chrom, coords = interval.split(':')
+    coord1, coord2 = coords.split('-')
+    return svtype, chrom, int(coord1), int(coord2)
+
+
+# segments: CPX_INTERVALS split by ','
+def extract_bp_list_for_inv_cnv_events(segments, cpx_type):
+    s0_svtype, s0_chrom, s0_c1, s0_c2 = parse_segment(segments[0])
+    breakpoints = [s0_chrom] + [s0_c1, s0_c2]
+    if 'INVdup' not in cpx_type:
+        for seg in segments[1:]:
+            seg_svtype, seg_chrom, seg_c1, seg_c2 = parse_segment(seg)
+            breakpoints.append(seg_c2)
+    else:
+        if cpx_type == 'INVdup':
+            s1_svtype, s1_chrom, s1_c1, s1_c2 = parse_segment(segments[1])
+            if s1_c1 < breakpoints[2]:
+                breakpoints = breakpoints[:2] + [s1_c1, breakpoints[2]]
+        elif cpx_type == 'dupINVdup' or cpx_type == 'delINVdup':
+            s2_svtype, s2_chrom, s2_c1, s2_c2 = parse_segment(segments[2])
+            breakpoints += [s2_c1, s2_c2]
+    return breakpoints
+
+
+def extract_bp_list_for_ddup_events(coordinates, segments, small_sv_size_threshold):
+    # example of coordinates: ["chr1",1154129,1154133]
+    # example of segments: DUP_chr1:229987763-230011157
+    del_bp = [coordinates[0], int(coordinates[1]), int(coordinates[2])]
+    # CW: this looks like it only gets the coordinates from the first DUP segment?
+    # dup_seg = [i for i in segments if 'DUP_' in i][0].split('_')[1].split(':')
+    # dup_bp = [int(i) for i in dup_seg[1].split('-')]
+    # inv_flag = len([i for i in segments if "INV_" in i])
+    # rewrite as:
+    inv_flag = 0
+    dup_bp = []
+    for segment in segments:
+        if 'DUP_' in segment:
+            seg_svtype, seg_chrom, seg_c1, seg_c2 = parse_segment(segment)
+            dup_bp = [seg_c1, seg_c2]
+        if 'INV_' in segment:
+            inv_flag = inv_flag + 1
+        if 'DEL_' in segment:
+            seg_svtype, seg_chrom, seg_c1, seg_c2 = parse_segment(segment)
+            del_bp = [seg_chrom, seg_c1, seg_c2]
+    # inv_flag == 0 : no INV involved in the insertion
+    # inv_flag > 0: INV involved
+    structures = []
+    breakpoints = []
+    if inv_flag == 0:
+        if del_bp[2] < dup_bp[0]:
+            breakpoints = del_bp + dup_bp
+            if del_bp[2] - del_bp[1] > small_sv_size_threshold:
+                structures = ['abc', 'cbc']
+            else:
+                structures = ['ab', 'bab']
+        elif del_bp[1] > dup_bp[1]:
+            breakpoints = [del_bp[0]] + dup_bp + del_bp[1:]
+            if del_bp[2] - del_bp[1] > small_sv_size_threshold:
+                structures = ['abc', 'aba']
+            else:
+                structures = ['ab', 'aba']
+        elif del_bp[1] < dup_bp[0] and not del_bp[2] < dup_bp[0]:
+            breakpoints = del_bp[:2] + dup_bp
+            structures = ['ab', 'bb']
+        elif del_bp[1] < dup_bp[1] and not del_bp[2] < dup_bp[1]:
+            breakpoints = [del_bp[0]] + dup_bp + [del_bp[2]]
+            structures = ['ab', 'aa']
+        else:
+            return 'unresolved'
+    elif inv_flag > 0:
+        if del_bp[2] < dup_bp[0]:
+            breakpoints = del_bp + dup_bp
+            if del_bp[2] - del_bp[1] > small_sv_size_threshold:
+                structures = ['abc', 'c^bc']
+            else:
+                structures = ['ab', 'b^ab']
+        elif del_bp[1] > dup_bp[1]:
+            breakpoints = [del_bp[0]] + dup_bp + del_bp[1:]
+            if del_bp[2] - del_bp[1] > small_sv_size_threshold:
+                structures = ['abc', 'aba^']
+            else:
+                structures = ['ab', 'aba^']
+        elif del_bp[1] < dup_bp[0] and not del_bp[2] < dup_bp[0]:
+            breakpoints = del_bp[:2] + dup_bp
+            structures = ['ab', 'b^b']
+        elif del_bp[1] < dup_bp[1] and not del_bp[2] < dup_bp[1]:
+            breakpoints = [del_bp[0]] + dup_bp + [del_bp[2]]
+            structures = ['ab', 'aa^']
+        else:
+            return 'unresolved'
+    return [breakpoints, structures]
+
+
+def extract_bp_list_for_ins_idel(coordinates, segments, small_sv_size_threshold):
+    # example of coordinates: ["chr1",1154129,1154133]
+    # example of segments: DUP_chr1:229987763-230011157
+    del_bp = [coordinates[0], int(coordinates[1]), int(coordinates[2])]
+    dup_seg = [i for i in segments if 'INS_' in i][0].split('_')[1].split(':')
+    dup_bp = [int(i) for i in dup_seg[1].split('-')]
+    inv_flag = len([i for i in segments if "INV_" in i])
+    # inv_flag == 0 : no INV involved in the insertion
+    # inv_flag > 0: INV involved
+    structures = []
+    breakpoints = []
+    if inv_flag == 0:
+        if del_bp[2] < dup_bp[0]:
+            breakpoints = del_bp + dup_bp
+            if del_bp[2] - del_bp[1] > small_sv_size_threshold:
+                structures = ['abc', 'cbc']
+            else:
+                structures = ['ab', 'bab']
+        elif del_bp[1] > dup_bp[1]:
+            breakpoints = [del_bp[0]] + dup_bp + del_bp[1:]
+            if del_bp[2] - del_bp[1] > small_sv_size_threshold:
+                structures = ['abc', 'aba']
+            else:
+                structures = ['ab', 'aba']
+        elif del_bp[1] < dup_bp[0] and not del_bp[2] < dup_bp[0]:
+            breakpoints = del_bp[:2] + dup_bp
+            structures = ['ab', 'bb']
+        elif del_bp[1] < dup_bp[1] and not del_bp[2] < dup_bp[1]:
+            breakpoints = [del_bp[0]] + dup_bp + [del_bp[2]]
+            structures = ['ab', 'aa']
+    elif inv_flag > 0:
+        if del_bp[2] < dup_bp[0]:
+            breakpoints = del_bp + dup_bp
+            if del_bp[2] - del_bp[1] > small_sv_size_threshold:
+                structures = ['abc', 'c^bc']
+            else:
+                structures = ['ab', 'b^ab']
+        elif del_bp[1] > dup_bp[1]:
+            breakpoints = [del_bp[0]] + dup_bp + del_bp[1:]
+            if del_bp[2] - del_bp[1] > small_sv_size_threshold:
+                structures = ['abc', 'aba^']
+            else:
+                structures = ['ab', 'aba^']
+        elif del_bp[1] < dup_bp[0] and not del_bp[2] < dup_bp[0]:
+            breakpoints = del_bp[:2] + dup_bp
+            structures = ['ab', 'b^b']
+        elif del_bp[1] < dup_bp[1] and not del_bp[2] < dup_bp[1]:
+            breakpoints = [del_bp[0]] + dup_bp + [del_bp[2]]
+            structures = ['ab', 'aa^']
+    return [breakpoints, structures]
+
+
+def extract_bp_list_v4(coordinates, segments, small_sv_size_threshold):
+    del_bp = [coordinates[0], int(coordinates[1]), int(coordinates[2])]
+    dup_seg = [i for i in segments if 'INV_' in i][0].split('_')[1].split(':')
+    dup_bp = [int(i) for i in dup_seg[1].split('-')]
+    structures = []
+    breakpoints = []
+    if del_bp[2] < dup_bp[0]:
+        breakpoints = del_bp + dup_bp
+        if del_bp[2] - del_bp[1] > small_sv_size_threshold:
+            structures = ['abc', 'c^bc']
+        else:
+            structures = ['ab', 'b^ab']
+    elif del_bp[1] > dup_bp[1]:
+        breakpoints = [del_bp[0]] + dup_bp + del_bp[1:]
+        if del_bp[2] - del_bp[1] > small_sv_size_threshold:
+            structures = ['abc', 'aba^']
+        else:
+            structures = ['ab', 'aba^']
+    return [breakpoints, structures]
+
+
+def is_interchromosomal(pin, header_pos):
+    chrom = pin[0]
+    if pin[header_pos['CPX_TYPE']] in ['dDUP', 'dDUP_iDEL']:
+        seg2 = [i for i in pin[header_pos['CPX_INTERVALS']].split(',') if 'DUP_' in i][0].split('_')[1].split(':')
+        if seg2[0] != chrom:
+            return True
+    elif pin[header_pos['CPX_TYPE']] in ['INS_iDEL']:
+        seg2 = [i for i in pin[header_pos['SOURCE']].split(',') if 'INS_' in i][0].split('_')[1].split(':')
+        if seg2[0] != chrom:
+            return True
+    elif pin[header_pos['CPX_TYPE']] in ['CTX_PQ/QP', 'CTX_PP/QQ'] or pin[header_pos['SVTYPE']] in ['CTX']:
+        return True
+    elif "INV_" in pin[header_pos['SOURCE']] and pin[header_pos['SVTYPE']] == "INS":
+        seg2 = [i for i in pin[header_pos['SOURCE']].split(',') if 'INV_' in i][0].split('_')[1].split(':')
+        seg2 = [seg2[0]] + seg2[1].split('-')
+        if seg2[0] != chrom:
+            return True
+    return False
+
+
+def cpx_sv_readin(input_bed, header_pos, unresolved):
+    unresolved_svids = []
+    fin = os.popen(r'''zcat %s''' % input_bed)
+    out = []
+    small_sv_size_threshold = 250
+    ref_alt = []
+    for line in fin:
+        pin = line.strip().split('\t')
+        # label pidups as unresolved since manual inspection indicated low quality
+        if not pin[0][0] == "#":
+            if pin[header_pos['CPX_TYPE']] in ['piDUP_RF', 'piDUP_FR']:
+                unresolved_svids.append(pin[header_pos['name']])
+            elif not is_interchromosomal(pin, header_pos):
+                if pin[header_pos['CPX_TYPE']] in ['delINV', 'INVdel', 'dupINV', 'INVdup', 'delINVdel', 'delINVdup',
+                                                   'dupINVdel', 'dupINVdup']:
+                    segments = pin[header_pos['CPX_INTERVALS']].split(',')
+                    breakpoints = extract_bp_list_for_inv_cnv_events(segments, pin[header_pos['CPX_TYPE']])
+                    if pin[header_pos['CPX_TYPE']] == 'delINV':
+                        ref_alt = ['ab', 'b^']
+                    if pin[header_pos['CPX_TYPE']] == 'INVdel':
+                        ref_alt = ['ab', 'a^']
+                    if pin[header_pos['CPX_TYPE']] == 'dupINV':
+                        ref_alt = ['ab', 'aba^']
+                    if pin[header_pos['CPX_TYPE']] == 'INVdup':
+                        ref_alt = ['ab', 'b^ab']
+                    if pin[header_pos['CPX_TYPE']] == 'delINVdel':
+                        ref_alt = ['abc', 'b^']
+                    if pin[header_pos['CPX_TYPE']] == 'delINVdup':
+                        ref_alt = ['abc', 'c^bc']
+                    if pin[header_pos['CPX_TYPE']] == 'dupINVdel':
+                        ref_alt = ['abc', 'aba^']
+                    if pin[header_pos['CPX_TYPE']] == 'dupINVdup':
+                        ref_alt = ['abc', 'ac^b^a^c']
+                elif pin[header_pos['CPX_TYPE']] in ['dDUP', 'dDUP_iDEL']:
+                    segments = pin[header_pos['CPX_INTERVALS']].split(',')
+                    cpx_info = extract_bp_list_for_ddup_events(pin[:3], segments, small_sv_size_threshold)
+                    if cpx_info == 'unresolved':
+                        unresolved_svids.append(pin[header_pos['name']])
+                        continue
+                    ref_alt = cpx_info[1]
+                    breakpoints = cpx_info[0]
+                elif pin[header_pos['CPX_TYPE']] in ['INS_iDEL']:
+                    segments = pin[header_pos['SOURCE']].split(',')
+                    cpx_info = extract_bp_list_for_ins_idel(pin[:3], segments, small_sv_size_threshold)
+                    ref_alt = cpx_info[1]
+                    breakpoints = cpx_info[0]
+                else:
+                    segments = pin[header_pos['SOURCE']].split(',')
+                    cpx_info = extract_bp_list_v4(pin[:3], segments, small_sv_size_threshold)
+                    ref_alt = cpx_info[1]
+                    breakpoints = cpx_info[0]
+                out.append([breakpoints, ref_alt, pin[3]])
+        else:
+            continue
+    fin.close()
+    with open(unresolved, 'w') as unres:
+        for svid in unresolved_svids:
+            unres.write(svid + "\n")
+    return out
+
+
+def cpx_inter_chromo_sv_readin(input_bed, header_pos):
+    fin = os.popen(r'''zcat %s''' % input_bed)
+    out = []
+    chr_list = ['chr' + str(i) for i in range(1, 23)] + ['chrX', 'chrY']
+    bp = []
+    ref_alt = []
+    for line in fin:
+        pin = line.strip().split('\t')
+        if not pin[0][0] == "#":
+            if is_interchromosomal(pin, header_pos):
+                if pin[header_pos['CPX_TYPE']] in ['dDUP', 'dDUP_iDEL']:
+                    seg1 = pin[:3]
+                    seg2 = [i for i in pin[header_pos['CPX_INTERVALS']].split(',') if 'DUP_' in i][0].split('_')[1].split(':')
+                    seg2 = [seg2[0]] + seg2[1].split('-')
+                    if chr_list.index(seg1[0]) < chr_list.index(seg2[0]):
+                        bp = [[seg1[0]] + [int(i) for i in seg1[1:]], [seg2[0]] + [int(i) for i in seg2[1:]]]
+                        if int(seg1[2]) - int(seg1[1]) > 250:
+                            ref_alt = ['ab_c', 'cb_c']
+                        else:
+                            ref_alt = ['a_b', 'ba_b']
+                    elif chr_list.index(seg1[0]) > chr_list.index(seg2[0]):
+                        bp = [[seg2[0]] + [int(i) for i in seg2[1:]], [seg1[0]] + [int(i) for i in seg1[1:]]]
+                        if int(seg1[2]) - int(seg1[1]) > 250:
+                            ref_alt = ['a_bc', 'a_ba']
+                        else:
+                            ref_alt = ['a_b', 'a_ba']
+                elif pin[header_pos['CPX_TYPE']] in ['INS_iDEL']:
+                    seg1 = pin[:3]
+                    seg2 = [i for i in pin[header_pos['SOURCE']].split(',') if 'INS_' in i][0].split('_')[1].split(':')
+                    seg2 = [seg2[0]] + seg2[1].split('-')
+                    if chr_list.index(seg1[0]) < chr_list.index(seg2[0]):
+                        bp = [[seg1[0]] + [int(i) for i in seg1[1:]], [seg2[0]] + [int(i) for i in seg2[1:]]]
+                        if int(seg1[2]) - int(seg1[1]) > 250:
+                            ref_alt = ['ab_c', 'cb_c']
+                        else:
+                            ref_alt = ['a_b', 'ba_b']
+                    elif chr_list.index(seg1[0]) > chr_list.index(seg2[0]):
+                        bp = [[seg2[0]] + [int(i) for i in seg2[1:]], [seg1[0]] + [int(i) for i in seg1[1:]]]
+                        if int(seg1[2]) - int(seg1[1]) > 250:
+                            ref_alt = ['a_bc', 'a_ba']
+                        else:
+                            ref_alt = ['a_b', 'a_ba']
+                elif pin[header_pos['CPX_TYPE']] in ['CTX_PQ/QP', 'CTX_PP/QQ'] or pin[header_pos['SVTYPE']] in ['CTX']:
+                    seg1 = pin[:3]
+                    seg2 = [pin[header_pos['CHR2']], pin[header_pos['END2']], pin[header_pos['END2']]]
+                    if chr_list.index(seg1[0]) < chr_list.index(seg2[0]):
+                        bp = [[seg1[0]] + [int(i) for i in seg1[1:]], [seg2[0]] + [int(i) for i in seg2[1:]]]
+                    elif chr_list.index(seg1[0]) > chr_list.index(seg2[0]):
+                        bp = [[seg2[0]] + [int(i) for i in seg2[1:]], [seg1[0]] + [int(i) for i in seg1[1:]]]
+                    ref_alt = [pin[header_pos['CPX_TYPE']]]
+                elif "INV_" in pin[header_pos['SOURCE']] and pin[header_pos['SVTYPE']] == "INS":
+                    seg1 = pin[:3]
+                    seg2 = [i for i in pin[header_pos['SOURCE']].split(',') if 'INV_' in i][0].split('_')[1].split(':')
+                    seg2 = [seg2[0]] + seg2[1].split('-')
+                    if chr_list.index(seg1[0]) < chr_list.index(seg2[0]):
+                        bp = [[seg1[0]] + [int(i) for i in seg1[1:]], [seg2[0]] + [int(i) for i in seg2[1:]]]
+                        if int(seg1[2]) - int(seg1[1]) > 250:
+                            ref_alt = ['ab_c', 'c^b_c']
+                        else:
+                            ref_alt = ['a_b', 'b^a_b']
+                    elif chr_list.index(seg1[0]) > chr_list.index(seg2[0]):
+                        bp = [[seg2[0]] + [int(i) for i in seg2[1:]], [seg1[0]] + [int(i) for i in seg1[1:]]]
+                        if int(seg1[2]) - int(seg1[1]) > 250:
+                            ref_alt = ['a_bc', 'a_ba^']
+                        else:
+                            ref_alt = ['a_b', 'a_ba^']
+                out.append([bp, ref_alt, pin[3]])
+    fin.close()
+    return out
+
+
+def cpx_sample_batch_readin(cpx_sv, svid_sample, sample_pe, pe_evidence, out_file):
+    out = []
+    flank_back = 1000
+    flank_front = 100
+    fo = open(out_file, 'w')
+    for info in cpx_sv:
+        breakpoints = info[0]
+        if info[1][0] == 'ab' and info[1][1] == 'b^':  # delINV
+            common_1 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[1] - flank_back) + '-' + str(breakpoints[1] + flank_front), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="+" && $6=="+"', '&&', '$5>' + str(breakpoints[3] - flank_back), '&&', '$5<' + str(breakpoints[3] + flank_front), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+            common_2 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[2] - flank_front) + '-' + str(breakpoints[2] + flank_back), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="-" && $6=="-"', '&&', '$5>' + str(breakpoints[3] - flank_front), '&&', '$5<' + str(breakpoints[3] + flank_back), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+        elif info[1][0] == 'abc' and info[1][1] == 'b^':  # delINVdel
+            common_1 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[1] - flank_back) + '-' + str(breakpoints[1] + flank_front), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="+" && $6=="+"', '&&', '$5>' + str(breakpoints[3] - flank_back), '&&', '$5<' + str(breakpoints[3] + flank_front), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+            common_2 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[2] - flank_front) + '-' + str(breakpoints[2] + flank_back), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="-" && $6=="-"', '&&', '$5>' + str(breakpoints[4] - flank_front), '&&', '$5<' + str(breakpoints[4] + flank_back), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+        elif info[1][0] == 'abc' and info[1][1] == 'c^bc':  # delINVdup
+            common_1 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[1] - flank_back) + '-' + str(breakpoints[1] + flank_front), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="+" && $6=="+"', '&&', '$5>' + str(breakpoints[4] - flank_back), '&&', '$5<' + str(breakpoints[4] + flank_front), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+            common_2 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[2] - flank_front) + '-' + str(breakpoints[2] + flank_back), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="-" && $6=="-"', '&&', '$5>' + str(breakpoints[3] - flank_front), '&&', '$5<' + str(breakpoints[3] + flank_back), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+        elif info[1][0] == 'ab' and info[1][1] == 'aba^':  # dupINV
+            common_1 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[2] - flank_back) + '-' + str(breakpoints[2] + flank_front), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="+" && $6=="+"', '&&', '$5>' + str(breakpoints[3] - flank_back), '&&', '$5<' + str(breakpoints[3] + flank_front), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+            common_2 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[1] - flank_front) + '-' + str(breakpoints[1] + flank_back), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="-" && $6=="-"', '&&', '$5>' + str(breakpoints[3] - flank_front), '&&', '$5<' + str(breakpoints[3] + flank_back), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+        elif info[1][0] == 'abc' and info[1][1] == 'ac^b^a^c':  # dupINVdup
+            common_1 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[2] - flank_back) + '-' + str(breakpoints[2] + flank_front), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="+" && $6=="+"', '&&', '$5>' + str(breakpoints[4] - flank_back), '&&', '$5<' + str(breakpoints[4] + flank_front), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+            common_2 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[1] - flank_front) + '-' + str(breakpoints[1] + flank_back), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="-" && $6=="-"', '&&', '$5>' + str(breakpoints[3] - flank_front), '&&', '$5<' + str(breakpoints[3] + flank_back), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+        elif info[1][0] == 'abc' and info[1][1] == 'aba^':  # dupINVdel
+            common_1 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[2] - flank_back) + '-' + str(breakpoints[2] + flank_front), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="+" && $6=="+"', '&&', '$5>' + str(breakpoints[3] - flank_back), '&&', '$5<' + str(breakpoints[3] + flank_front), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+            common_2 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[1] - flank_front) + '-' + str(breakpoints[1] + flank_back), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="-" && $6=="-"', '&&', '$5>' + str(breakpoints[4] - flank_front), '&&', '$5<' + str(breakpoints[4] + flank_back), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+        elif info[1][0] == 'ab' and info[1][1] == 'a^':  # INVdel
+            common_1 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[1] - flank_back) + '-' + str(breakpoints[1] + flank_front), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="+" && $6=="+"', '&&', '$5>' + str(breakpoints[2] - flank_back), '&&', '$5<' + str(breakpoints[2] + flank_front), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+            common_2 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[1] - flank_front) + '-' + str(breakpoints[1] + flank_back), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="-" && $6=="-"', '&&', '$5>' + str(breakpoints[3] - flank_front), '&&', '$5<' + str(breakpoints[3] + flank_back), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+        elif info[1][0] == 'ab' and info[1][1] == 'b^ab':  # INVdup
+            common_1 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[1] - flank_back) + '-' + str(breakpoints[1] + flank_front), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="+" && $6=="+"', '&&', '$5>' + str(breakpoints[3] - flank_back), '&&', '$5<' + str(breakpoints[3] + flank_front), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+            common_2 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[1] - flank_front) + '-' + str(breakpoints[1] + flank_back), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="-" && $6=="-"', '&&', '$5>' + str(breakpoints[2] - flank_front), '&&', '$5<' + str(breakpoints[2] + flank_back), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+        elif info[1][0] == 'ab' and info[1][1] == 'aba':  # dupINS
+            common_1 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[2] - flank_back) + '-' + str(breakpoints[2] + flank_front), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="+" && $6=="-"', '&&', '$5>' + str(breakpoints[3] - flank_front), '&&', '$5<' + str(breakpoints[3] + flank_back), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+            common_2 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[1] - flank_front) + '-' + str(breakpoints[1] + flank_back), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="-" && $6=="+"', '&&', '$5>' + str(breakpoints[3] - flank_back), '&&', '$5<' + str(breakpoints[3] + flank_front), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+        elif info[1][0] == 'abc' and info[1][1] == 'aba':  # dupINSdel
+            common_1 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[2] - flank_back) + '-' + str(breakpoints[2] + flank_front), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="+" && $6=="-"', '&&', '$5>' + str(breakpoints[4] - flank_front), '&&', '$5<' + str(breakpoints[4] + flank_back), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+            common_2 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[1] - flank_front) + '-' + str(breakpoints[1] + flank_back), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="-" && $6=="+"', '&&', '$5>' + str(breakpoints[3] - flank_back), '&&', '$5<' + str(breakpoints[3] + flank_front), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+        elif info[1][0] == 'ab' and info[1][1] == 'aa':  # dupdel
+            common_1 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[2] - flank_back) + '-' + str(breakpoints[2] + flank_front), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="+" && $6=="-"', '&&', '$5>' + str(breakpoints[3] - flank_front), '&&', '$5<' + str(breakpoints[3] + flank_back), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+            common_2 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[1] - flank_front) + '-' + str(breakpoints[1] + flank_back), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="-" && $6=="+"', '&&', '$5>' + str(breakpoints[2] - flank_back), '&&', '$5<' + str(breakpoints[2] + flank_front), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+        elif info[1][0] == 'ab' and info[1][1] == 'bab':  # INSdup
+            common_1 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[1] - flank_back) + '-' + str(breakpoints[1] + flank_front), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="+" && $6=="-"', '&&', '$5>' + str(breakpoints[2] - flank_front), '&&', '$5<' + str(breakpoints[2] + flank_back), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+            common_2 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[1] - flank_front) + '-' + str(breakpoints[1] + flank_back), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="-" && $6=="+"', '&&', '$5>' + str(breakpoints[3] - flank_back), '&&', '$5<' + str(breakpoints[3] + flank_front), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+        elif info[1][0] == 'abc' and info[1][1] == 'cbc':  # delINSdup
+            common_1 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[1] - flank_back) + '-' + str(breakpoints[1] + flank_front), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="+" && $6=="-"', '&&', '$5>' + str(breakpoints[3] - flank_front), '&&', '$5<' + str(breakpoints[3] + flank_back), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+            common_2 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[2] - flank_front) + '-' + str(breakpoints[2] + flank_back), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="-" && $6=="+"', '&&', '$5>' + str(breakpoints[4] - flank_back), '&&', '$5<' + str(breakpoints[4] + flank_front), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+        elif info[1][0] == 'ab' and info[1][1] == 'bb':  # deldup
+            common_1 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[1] - flank_back) + '-' + str(breakpoints[1] + flank_front), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="+" && $6=="-"', '&&', '$5>' + str(breakpoints[2] - flank_front), '&&', '$5<' + str(breakpoints[2] + flank_back), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+            common_2 = ['tabix', 'PE_metrics', breakpoints[0] + ':' + str(breakpoints[2] - flank_front) + '-' + str(breakpoints[2] + flank_back), '| grep', 'sample', '| awk', "'{if ($1==$4", '&&', '$3=="-" && $6=="+"', '&&', '$5>' + str(breakpoints[3] - flank_back), '&&', '$5<' + str(breakpoints[3] + flank_front), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+        elif info[1] == ['a_b', 'ba_b'] or info[1] == ['ab_c', 'cb_c']:   # ddup or ddup_iDEL, insertion from the smaller chromosome to the larger chromosome
+            common_1 = ['tabix', 'PE_metrics', breakpoints[0][0] + ':' + str(breakpoints[0][1] - flank_back) + '-' + str(breakpoints[0][1] + flank_front), '| grep', 'sample', '| awk', "'{if (", '$3=="+" && $6=="-"', '&&', '$4=="' + breakpoints[1][0] + '" &&', '$5>' + str(breakpoints[1][1] - flank_front), '&&', '$5<' + str(breakpoints[1][1] + flank_back), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+            common_2 = ['tabix', 'PE_metrics', breakpoints[0][0] + ':' + str(breakpoints[0][2] - flank_front) + '-' + str(breakpoints[0][2] + flank_back), '| grep', 'sample', '| awk', "'{if (", '$3=="-" && $6=="+"', '&&', '$4=="' + breakpoints[1][0] + '" &&', '$5>' + str(breakpoints[1][2] - flank_back), '&&', '$5<' + str(breakpoints[1][2] + flank_front), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+        elif info[1] == ['a_b', 'a_ba'] or info[1] == ['a_bc', 'a_ba']:   # ddup or ddup_iDEL, insertion from the larger chromosome to the smaller chromosome
+            common_1 = ['tabix', 'PE_metrics', breakpoints[0][0] + ':' + str(breakpoints[0][1] - flank_front) + '-' + str(breakpoints[0][1] + flank_back), '| grep', 'sample', '| awk', "'{if (", '$3=="-" && $6=="+"', '&&', '$4=="' + breakpoints[1][0] + '" &&', '$5>' + str(breakpoints[1][1] - flank_back), '&&', '$5<' + str(breakpoints[1][1] + flank_front), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+            common_2 = ['tabix', 'PE_metrics', breakpoints[0][0] + ':' + str(breakpoints[0][2] - flank_back) + '-' + str(breakpoints[0][2] + flank_front), '| grep', 'sample', '| awk', "'{if (", '$3=="+" && $6=="-"', '&&', '$4=="' + breakpoints[1][0] + '" &&', '$5>' + str(breakpoints[1][2] - flank_front), '&&', '$5<' + str(breakpoints[1][2] + flank_back), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+        elif info[1] == ['a_b', 'b^a_b'] or info[1] == ['ab_c', 'c^b_c']:  # inverted insertion, insertion from the smaller chromosome to the larger chromosome
+            common_1 = ['tabix', 'PE_metrics', breakpoints[0][0] + ':' + str(breakpoints[0][1] - flank_back) + '-' + str(breakpoints[0][1] + flank_front), '| grep', 'sample', '| awk', "'{if (", '$3=="+" && $6=="+"', '&&', '$4=="' + breakpoints[1][0] + '" &&', '$5>' + str(breakpoints[1][2] - flank_back), '&&', '$5<' + str(breakpoints[1][2] + flank_front), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+            common_2 = ['tabix', 'PE_metrics', breakpoints[0][0] + ':' + str(breakpoints[0][2] - flank_front) + '-' + str(breakpoints[0][2] + flank_back), '| grep', 'sample', '| awk', "'{if (", '$3=="-" && $6=="-"', '&&', '$4=="' + breakpoints[1][0] + '" &&', '$5>' + str(breakpoints[1][1] - flank_front), '&&', '$5<' + str(breakpoints[1][1] + flank_back), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+        elif info[1] == ['a_b', 'a_ba^'] or info[1] == ['a_bc', 'a_ba^']:  # inverted insertion, insertion from the larger chromosome to the smaller chromosome
+            common_1 = ['tabix', 'PE_metrics', breakpoints[0][0] + ':' + str(breakpoints[0][1] - flank_front) + '-' + str(breakpoints[0][1] + flank_back), '| grep', 'sample', '| awk', "'{if (", '$3=="-" && $6=="-"', '&&', '$4=="' + breakpoints[1][0] + '" &&', '$5>' + str(breakpoints[1][2] - flank_front), '&&', '$5<' + str(breakpoints[1][2] + flank_back), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+            common_2 = ['tabix', 'PE_metrics', breakpoints[0][0] + ':' + str(breakpoints[0][2] - flank_back) + '-' + str(breakpoints[0][2] + flank_front), '| grep', 'sample', '| awk', "'{if (", '$3=="+" && $6=="+"', '&&', '$4=="' + breakpoints[1][0] + '" &&', '$5>' + str(breakpoints[1][1] - flank_back), '&&', '$5<' + str(breakpoints[1][1] + flank_front), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+        elif info[1] == ['CTX_PQ/QP']:  # inverted insertion, insertion from the larger chromosome to the smaller chromosome
+            common_1 = ['tabix', 'PE_metrics', breakpoints[0][0] + ':' + str(breakpoints[0][1] - flank_back) + '-' + str(breakpoints[0][1] + flank_front), '| grep', 'sample', '| awk', "'{if (", '$3=="+" && $6=="+"', '&&', '$4=="' + breakpoints[1][0] + '" &&', '$5>' + str(breakpoints[1][1] - flank_back), '&&', '$5<' + str(breakpoints[1][1] + flank_front), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+            common_2 = ['tabix', 'PE_metrics', breakpoints[0][0] + ':' + str(breakpoints[0][2] - flank_front) + '-' + str(breakpoints[0][2] + flank_back), '| grep', 'sample', '| awk', "'{if (", '$3=="-" && $6=="-"', '&&', '$4=="' + breakpoints[1][0] + '" &&', '$5>' + str(breakpoints[1][2] - flank_front), '&&', '$5<' + str(breakpoints[1][2] + flank_back), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+        elif info[1] == ['CTX_PP/QQ']:  # inverted insertion, insertion from the larger chromosome to the smaller chromosome
+            common_1 = ['tabix', 'PE_metrics', breakpoints[0][0] + ':' + str(breakpoints[0][1] - flank_back) + '-' + str(breakpoints[0][1] + flank_front), '| grep', 'sample', '| awk', "'{if (", '$3=="+" && $6=="-"', '&&', '$4=="' + breakpoints[1][0] + '" &&', '$5>' + str(breakpoints[1][1] - flank_front), '&&', '$5<' + str(breakpoints[1][1] + flank_back), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+            common_2 = ['tabix', 'PE_metrics', breakpoints[0][0] + ':' + str(breakpoints[0][2] - flank_front) + '-' + str(breakpoints[0][2] + flank_back), '| grep', 'sample', '| awk', "'{if (", '$3=="-" && $6=="+"', '&&', '$4=="' + breakpoints[1][0] + '" &&', '$5>' + str(breakpoints[1][2] - flank_back), '&&', '$5<' + str(breakpoints[1][2] + flank_front), ") print}' | sed -e 's/$/\\t", info[2], "/'", '>>', pe_evidence]
+        else:
+            print(info)
+            common_1 = None
+            common_2 = None
+        samples = svid_sample[info[2]]
+        if not samples == '' and not samples == 'NA':
+            sample_list = samples.split(',')
+            pe_metrics_list = [sample_pe[samp] for samp in sample_list]
+            for num in range(len(sample_list)):
+                common_1[1] = pe_metrics_list[num]
+                common_2[1] = pe_metrics_list[num]
+                common_1[4] = sample_list[num]
+                common_2[4] = sample_list[num]
+                write_1 = ' '.join(common_1)
+                write_2 = ' '.join(common_2)
+                print(write_1, file=fo)
+                print(write_2, file=fo)
+
+    fo.close()
+    return out
+
+
+def write_cpx_command(cpx_command, out_file):
+    fo = open(out_file, 'w')
+    for i in cpx_command:
+        print(i, file=fo)
+    fo.close()
+
+
+def write_cpx_svs(cpx_sv, out_file):
+    fo = open(out_file, 'w')
+    for i in cpx_sv:
+        out = [i[2], ':'.join([str(j) for j in i[0]])] + i[1]
+        print('\t'.join(out), file=fo)
+    fo.close()
+
+
+def main():
+    """
+    Command-line main block
+    """
+
+    # Parse command line arguments and options
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument('-i', '--input', required=True, help='input file of CPX events in bed format')
+    parser.add_argument('-s', '--sample_pe', required=True, help='2 column file with sample ID and PE metrics information')
+    parser.add_argument('-p', '--pe_evidence', required=True, help='name of file to store collected PE metrics')
+    parser.add_argument('-c', '--command_script', required=True, help='name of file that has scripts to collect PE evidences')
+    parser.add_argument('-r', '--reformat_SV', required=True, help='reformatted SV in svelter format')
+    parser.add_argument('-u', '--unresolved', required=True, help='list of SVIDs to mark unresolved without evaluating (temporary workaround')  # TODO
+    args = parser.parse_args()
+
+    input_bed = args.input
+    sample_pe_file = args.sample_pe
+    pe_evidence = args.pe_evidence
+    command_script = args.command_script
+    reformatted_sv = args.reformat_SV
+
+    header_pos = header_pos_readin(input_bed)
+    sample_pe = sample_pe_readin(sample_pe_file)
+    svid_sample = svid_sample_readin(input_bed, header_pos)
+
+    cpx_sv = cpx_sv_readin(input_bed, header_pos, args.unresolved)
+    cpx_inter_chromo_sv = cpx_inter_chromo_sv_readin(input_bed, header_pos)
+    write_cpx_svs(cpx_sv + cpx_inter_chromo_sv, reformatted_sv)
+
+    cpx_sample_batch_readin(cpx_sv + cpx_inter_chromo_sv, svid_sample, sample_pe, pe_evidence, command_script)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/terra_pipeline_diagram.jpg b/terra_pipeline_diagram.jpg
index e3af1c2f7..226f13c2c 100644
--- a/terra_pipeline_diagram.jpg
+++ b/terra_pipeline_diagram.jpg
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7312ab3c1e136b98f3721606ecec017a1a2f6787144aaaf12fe16ab00a9a6a53
-size 295450
+oid sha256:e7b763bd8386fb082264ae43a5ed9ca922b4dd9763f58e079d2028375fb8e8af
+size 307785
diff --git a/wdl/CleanVcf.wdl b/wdl/CleanVcf.wdl
index c86e88500..ab228ccf7 100644
--- a/wdl/CleanVcf.wdl
+++ b/wdl/CleanVcf.wdl
@@ -24,6 +24,9 @@ workflow CleanVcf {
     Int clean_vcf1b_records_per_shard
     Int clean_vcf5_records_per_shard
 
+    File HERVK_reference
+    File LINE1_reference
+
     String chr_x
     String chr_y
 
@@ -67,6 +70,7 @@ workflow CleanVcf {
     RuntimeAttr? runtime_override_stitch_fragmented_cnvs
     RuntimeAttr? runtime_override_final_cleanup
     RuntimeAttr? runtime_attr_format
+    RuntimeAttr? runtime_override_rescue_me_dels
 
     # Clean vcf 1b
     RuntimeAttr? runtime_attr_override_subset_large_cnvs_1b
@@ -133,6 +137,8 @@ workflow CleanVcf {
         clean_vcf1b_records_per_shard=clean_vcf1b_records_per_shard,
         clean_vcf5_records_per_shard=clean_vcf5_records_per_shard,
         ploidy_table=CreatePloidyTableFromPed.out,
+        HERVK_reference=HERVK_reference,
+        LINE1_reference=LINE1_reference,
         chr_x=chr_x,
         chr_y=chr_y,
         linux_docker=linux_docker,
@@ -170,6 +176,7 @@ workflow CleanVcf {
         runtime_override_concat_vcfs_1b=runtime_override_concat_vcfs_1b,
         runtime_override_cat_multi_cnvs_1b=runtime_override_cat_multi_cnvs_1b,
         runtime_attr_format=runtime_attr_format,
+        runtime_override_rescue_me_dels=runtime_override_rescue_me_dels
     }
   }
 
diff --git a/wdl/CleanVcfChromosome.wdl b/wdl/CleanVcfChromosome.wdl
index 29b3bcc8e..a14ffa8c4 100644
--- a/wdl/CleanVcfChromosome.wdl
+++ b/wdl/CleanVcfChromosome.wdl
@@ -25,6 +25,9 @@ workflow CleanVcfChromosome {
     File? outlier_samples_list
     Int? max_samples_per_shard_step3
 
+    File HERVK_reference
+    File LINE1_reference
+
     File ploidy_table
     String chr_x
     String chr_y
@@ -49,6 +52,7 @@ workflow CleanVcfChromosome {
     RuntimeAttr? runtime_override_clean_vcf_5_polish
     RuntimeAttr? runtime_override_stitch_fragmented_cnvs
     RuntimeAttr? runtime_override_final_cleanup
+    RuntimeAttr? runtime_override_rescue_me_dels
 
     # Clean vcf 1b
     RuntimeAttr? runtime_attr_override_subset_large_cnvs_1b
@@ -285,9 +289,19 @@ workflow CleanVcfChromosome {
       runtime_attr_override=runtime_override_stitch_fragmented_cnvs
   }
 
+  call RescueMobileElementDeletions {
+    input:
+      vcf = StitchFragmentedCnvs.stitched_vcf_shard,
+      prefix = "~{prefix}.rescue_me_dels",
+      LINE1 = LINE1_reference,
+      HERVK = HERVK_reference,
+      sv_pipeline_docker = sv_pipeline_docker,
+      runtime_attr_override = runtime_override_rescue_me_dels
+  }
+
   call FinalCleanup {
     input:
-      vcf=StitchFragmentedCnvs.stitched_vcf_shard,
+      vcf=RescueMobileElementDeletions.out,
       contig=contig,
       prefix="~{prefix}.final_cleanup",
       sv_pipeline_docker=sv_pipeline_docker,
@@ -595,6 +609,99 @@ task CleanVcf4 {
   }
 }
 
+task RescueMobileElementDeletions {
+  input {
+    File vcf
+    String prefix
+    File LINE1
+    File HERVK
+    String sv_pipeline_docker
+    RuntimeAttr? runtime_attr_override
+  }
+
+  Float input_size = size(vcf, "GiB")
+  RuntimeAttr runtime_default = object {
+    mem_gb: 3.75 + input_size * 1.5,
+    disk_gb: ceil(100.0 + input_size * 3.0),
+    cpu_cores: 1,
+    preemptible_tries: 3,
+    max_retries: 1,
+    boot_disk_gb: 10
+  }
+  RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
+  runtime {
+    memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
+    disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
+    cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
+    preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
+    maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
+    docker: sv_pipeline_docker
+    bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
+  }
+
+  command <<<
+    set -euo pipefail
+
+    python <<CODE
+import os
+import pysam
+fin=pysam.VariantFile("~{vcf}")
+fo=pysam.VariantFile("~{prefix}.bnd_del.vcf.gz", 'w', header = fin.header)
+for record in fin:
+    if record.info['SVTYPE'] in ['BND'] and record.info['STRANDS']=="+-" and record.chrom == record.info['CHR2'] and record.info['END2'] - record.start < 10000:
+        record.info['SVLEN'] = record.info['END2'] - record.start
+        fo.write(record)
+fin.close()
+fo.close()
+CODE
+
+    tabix -p vcf ~{prefix}.bnd_del.vcf.gz
+
+    svtk vcf2bed ~{prefix}.bnd_del.vcf.gz -i ALL --include-filters ~{prefix}.bnd_del.bed
+    bgzip ~{prefix}.bnd_del.bed
+
+    bedtools coverage -wo -a ~{prefix}.bnd_del.bed.gz -b ~{LINE1} | awk '{if ($NF>.5) print}' | cut -f4 | sed -e 's/$/\tDEL\tPASS\toverlap_LINE1/' > manual_revise.MEI_DEL_from_BND.SVID_SVTYPE_FILTER_INFO.tsv
+    bedtools coverage -wo -a ~{prefix}.bnd_del.bed.gz -b ~{HERVK} | awk '{if ($NF>.5) print}' | cut -f4 | sed -e 's/$/\tDEL\tPASS\toverlap_HERVK/' >> manual_revise.MEI_DEL_from_BND.SVID_SVTYPE_FILTER_INFO.tsv
+
+    python <<CODE
+import pysam
+def SVID_MEI_DEL_readin(MEI_DEL_reset):
+    out={}
+    fin=open(MEI_DEL_reset)
+    for line in fin:
+        pin=line.strip().split()
+        if not pin[0] in out.keys():
+            out[pin[0]] = pin[3]
+    fin.close()
+    return out
+
+hash_MEI_DEL_reset = SVID_MEI_DEL_readin("manual_revise.MEI_DEL_from_BND.SVID_SVTYPE_FILTER_INFO.tsv")
+fin=pysam.VariantFile("~{vcf}")
+fo=pysam.VariantFile("~{prefix}.vcf.gz", 'w', header = fin.header)
+for record in fin:
+    if record.id in hash_MEI_DEL_reset.keys():
+        del record.filter['UNRESOLVED']
+        record.info['SVTYPE'] = 'DEL'
+        record.info['SVLEN'] = record.info['END2'] - record.start
+        record.stop = record.info['END2']
+        record.info.pop("CHR2")
+        record.info.pop("END2")
+        record.info.pop("UNRESOLVED_TYPE")
+        if hash_MEI_DEL_reset[record.id] == 'overlap_LINE1':
+            record.alts = ('<DEL:ME:LINE1>',)
+        if hash_MEI_DEL_reset[record.id] == 'overlap_HERVK':
+            record.alts = ('<DEL:ME:HERVK>',)
+    fo.write(record)
+fin.close()
+fo.close()
+CODE
+  >>>
+
+  output {
+    File out = "~{prefix}.vcf.gz"
+  }
+}
+
 
 # Remove CNVs that are redundant with CPX events or other CNVs
 task DropRedundantCnvs {
diff --git a/wdl/CollectLargeCNVSupportForCPX.wdl b/wdl/CollectLargeCNVSupportForCPX.wdl
new file mode 100644
index 000000000..6c943238a
--- /dev/null
+++ b/wdl/CollectLargeCNVSupportForCPX.wdl
@@ -0,0 +1,284 @@
+version 1.0
+
+import "Structs.wdl"
+import "TasksMakeCohortVcf.wdl" as MiniTasks
+
+workflow CollectLargeCNVSupportForCPX {
+    input {
+        File cpx_ctx_bed
+        String prefix
+
+        File sample_batch_pe_map
+        Array[String] batch_name_list
+        Array[File] Depth_DEL_beds
+        Array[File] Depth_DUP_beds
+
+        String sv_base_mini_docker
+        String sv_pipeline_docker
+
+        RuntimeAttr? runtime_attr_generate_cnv_segments_from_cpx
+        RuntimeAttr? runtime_attr_extract_cpx_lg_cnv_by_batch
+        RuntimeAttr? runtime_attr_seek_depth_supp_for_cpx
+        RuntimeAttr? runtime_attr_concat_bed_Step1
+        RuntimeAttr? runtime_attr_concat_bed_Step2
+
+    }
+
+    call GenerateCnvSegmentFromCpx {
+        input:
+            bed = cpx_ctx_bed, #input file including cpx calls in bed format; 
+            sample_batch_pe_map = sample_batch_pe_map,
+            sv_pipeline_docker = sv_pipeline_docker,
+            runtime_attr_override = runtime_attr_generate_cnv_segments_from_cpx
+    }
+
+    scatter (i in range(length(batch_name_list))){
+        call ExtractCpxLgCnvByBatch {
+            input:
+                bed_gz = GenerateCnvSegmentFromCpx.cpx_lg_cnv,
+                batch_name = batch_name_list[i],
+                sv_base_mini_docker = sv_base_mini_docker,
+                runtime_attr_override = runtime_attr_extract_cpx_lg_cnv_by_batch
+        }
+
+        call SeekDepthSuppForCpx as seek_depth_supp_for_cpx_del {
+            input:
+                cpx_lg_cnv = ExtractCpxLgCnvByBatch.lg_cnv_del,
+                raw_depth_bed = Depth_DEL_beds[i],
+                sv_pipeline_docker = sv_pipeline_docker,
+                runtime_attr_override = runtime_attr_seek_depth_supp_for_cpx
+        }
+
+        call SeekDepthSuppForCpx as seek_depth_supp_for_cpx_dup {
+            input:
+                cpx_lg_cnv = ExtractCpxLgCnvByBatch.lg_cnv_dup,
+                raw_depth_bed = Depth_DUP_beds[i],
+                sv_pipeline_docker = sv_pipeline_docker,
+                runtime_attr_override = runtime_attr_seek_depth_supp_for_cpx
+        }
+
+        call MiniTasks.ConcatBeds as concat_beds_svtype {
+            input:
+                shard_bed_files = [seek_depth_supp_for_cpx_del.cpx_cnv_depth_supp, seek_depth_supp_for_cpx_dup.cpx_cnv_depth_supp],
+                prefix = "~{batch_name_list[i]}.lg_cnv.depth_supp",
+                sv_base_mini_docker = sv_base_mini_docker,
+                runtime_attr_override = runtime_attr_concat_bed_Step1
+        }
+    }
+
+
+    call MiniTasks.ConcatBeds as concat_beds_across_batches {
+        input:
+            shard_bed_files = concat_beds_svtype.merged_bed_file,
+            prefix = "~{prefix}.lg_cnv.depth_supp",
+            sv_base_mini_docker = sv_base_mini_docker,
+            runtime_attr_override = runtime_attr_concat_bed_Step2
+        }
+
+    output {
+        File lg_cnv_depth_supp = concat_beds_across_batches.merged_bed_file
+    }
+}
+
+task SeekDepthSuppForCpx {
+    input {
+        File cpx_lg_cnv
+        File raw_depth_bed
+        String sv_pipeline_docker
+        RuntimeAttr? runtime_attr_override
+    }
+
+    RuntimeAttr default_attr = object {
+        cpu_cores: 1,
+        mem_gb: 3.75,
+        disk_gb: 10,
+        boot_disk_gb: 10,
+        preemptible_tries: 3,
+        max_retries: 1
+    }
+    RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
+
+    String prefix = basename(cpx_lg_cnv, '.bed.gz')
+
+    output {
+           File cpx_cnv_depth_supp = "~{prefix}.depth_supp.bed.gz"
+    }
+    command <<<
+
+        set -e pipefail
+
+        zcat ~{cpx_lg_cnv} | awk '{print $6}' | sort | uniq > sample_list.tsv
+
+        echo -e '#chr\tpos\tend\tSVTYPE\tSVID\tsample\tbatch\tdepth_cov' > ~{prefix}.depth_supp.bed
+
+        while read sample_name; do
+            zcat ~{cpx_lg_cnv}    | awk -v sample="$sample_name" '{if ($6==sample) print}'  > query.bed
+            zcat ~{raw_depth_bed} | awk -v sample="$sample_name" '{if ($5==sample) print}'  > ref.bed
+            bedtools coverage -a query.bed -b ref.bed \
+                | awk '{print $1,$2,$3,$4,$5,$6,$7,$NF}' \
+                | sed -e 's/ /\t/g' \
+                >> ~{prefix}.depth_supp.bed
+        done < sample_list.tsv
+
+        bgzip ~{prefix}.depth_supp.bed
+
+    >>>
+    runtime {
+        cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
+        memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
+        disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
+        bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
+        docker: sv_pipeline_docker
+        preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
+        maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
+    }
+}
+
+
+task ExtractCpxLgCnvByBatch {
+    input {
+        File bed_gz
+        String batch_name
+        String sv_base_mini_docker
+        RuntimeAttr? runtime_attr_override
+    }
+
+    RuntimeAttr default_attr = object {
+        cpu_cores: 1,
+        mem_gb: 3.75,
+        disk_gb: 10,
+        boot_disk_gb: 10,
+        preemptible_tries: 3,
+        max_retries: 1
+    }
+    RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
+
+    output {
+           File lg_cnv_del = "~{batch_name}.lg_cnv.DEL.bed.gz"
+           File lg_cnv_dup = "~{batch_name}.lg_cnv.DUP.bed.gz"
+    }
+    command <<<
+
+        set -e pipefail
+
+        zcat ~{bed_gz} | awk '{if ($4=="DEL" && $7=="~{batch_name}") print}' > ~{batch_name}.lg_cnv.DEL.bed
+        zcat ~{bed_gz} | awk '{if ($4=="DUP" && $7=="~{batch_name}") print}' > ~{batch_name}.lg_cnv.DUP.bed
+        bgzip ~{batch_name}.lg_cnv.DEL.bed
+        bgzip ~{batch_name}.lg_cnv.DUP.bed
+
+    >>>
+    runtime {
+        cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
+        memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
+        disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
+        bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
+        docker: sv_base_mini_docker
+        preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
+        maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
+    }
+}
+
+task GenerateCnvSegmentFromCpx {
+    input {
+        File bed
+        File sample_batch_pe_map
+        String sv_pipeline_docker
+        Int min_depth_size = 5000
+        RuntimeAttr? runtime_attr_override
+    }
+
+    RuntimeAttr default_attr = object {
+        cpu_cores: 1,
+        mem_gb: 3.75,
+        disk_gb: 10,
+        boot_disk_gb: 10,
+        preemptible_tries: 1,
+        max_retries: 1
+    }
+
+    RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
+
+
+    String prefix  = basename(bed, ".bed.gz")
+
+    command <<<
+        set -euo pipefail
+
+        python <<CODE
+
+        import os
+        import sys
+        def split_cpx_interval(info):
+            #eg of info: DUP_chrY:3125606-3125667
+            out = [info.split('_')[0], info.split('_')[1].split(':')[0]] +  [int(i) for i in info.split('_')[1].split(':')[1].split('-')]
+            return out[1:4]+[out[0]]
+
+        def get_sample_batch_map(sample_batch_pe_map):
+            sample_to_batch = {}
+            with open(sample_batch_pe_map, 'r') as inp:
+                for line in inp:
+                    sample, batch, pe_file = line.strip().split("\t")
+                    sample_to_batch[sample] = batch
+            return sample_to_batch
+
+        def readin_cpx_cnv(bed_input,sample_to_batch,min_depth_size):
+            CPX_CNV = []
+            f_bed = os.popen(r'''zcat %s'''%(bed_input))
+            for line in f_bed:
+                pin=line.strip().split('\t')
+                if pin[0][0]=='#':
+                    pos_CPX_TYPE = pin.index('CPX_TYPE')
+                    pos_CPX_INTERVALS = pin.index('CPX_INTERVALS')
+                    pos_SOURCE = pin.index('SOURCE')
+                    pos_SAMPLES = pin.index('samples')
+                else:
+                    interval = []
+                    if 'DEL_' in pin[pos_CPX_INTERVALS] or 'DUP_' in pin[pos_CPX_INTERVALS]:
+                        interval += [split_cpx_interval(i)+[pin[3]] for i in pin[pos_CPX_INTERVALS].split(",") if "DEL_" in i or "DUP_" in i]
+                    if 'DEL_' in pin[pos_SOURCE] or 'DUP_' in pin[pos_SOURCE]:
+                        interval += [split_cpx_interval(i)+[pin[3]] for i in pin[pos_SOURCE].split(",") if "DEL_" in i or "DUP_" in i]
+                    if len(interval)>0:
+                        for i in interval:
+                            if i[2]-i[1]>=min_depth_size:
+                                sample_names = pin[pos_SAMPLES].split(',')
+                                if i[3]=="DEL":
+                                    for j in sample_names:
+                                        CPX_CNV.append(i+[j, sample_to_batch[j]])
+                                if i[3]=="DUP":
+                                    for j in sample_names:
+                                        CPX_CNV.append(i+[j, sample_to_batch[j]])
+            f_bed.close()
+            return CPX_CNV
+
+        def write_cpx_cnv(CPX_CNV, fileout):
+            fo=open(fileout, 'w')
+            for info in CPX_CNV:
+                print('\t'.join([str(i) for i in info]), file=fo)
+            fo.close()
+
+        bed_input = "~{bed}"
+        fileout = "~{prefix}.lg_CNV.bed"
+        min_depth_size = ~{min_depth_size}
+        sample_to_batch = get_sample_batch_map("~{sample_batch_pe_map}")
+        CPX_CNV = readin_cpx_cnv(bed_input, sample_to_batch, min_depth_size)
+        write_cpx_cnv(CPX_CNV, fileout)
+        CODE
+
+        bgzip "~{prefix}.lg_CNV.bed"
+    >>>
+
+    output {
+        File cpx_lg_cnv = "~{prefix}.lg_CNV.bed.gz"
+    }
+
+    runtime {
+        cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
+        memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
+        disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
+        bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
+        docker: sv_pipeline_docker
+        preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
+        maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
+    }
+}
+
diff --git a/wdl/CollectPEMetricsForCPX.wdl b/wdl/CollectPEMetricsForCPX.wdl
new file mode 100644
index 000000000..9a6fbd0c1
--- /dev/null
+++ b/wdl/CollectPEMetricsForCPX.wdl
@@ -0,0 +1,102 @@
+version 1.0
+
+import "Structs.wdl"
+import "CollectPEMetricsPerBatchCPX.wdl" as per_batch
+
+workflow CollectPEMetricsForCPX {
+    input {
+        Array[String] batch_name_list
+        Array[File] PE_metrics
+        Array[File] PE_metrics_idxes
+        File PE_collect_script
+        String prefix
+        Int n_per_split
+        String sv_pipeline_docker
+        String sv_base_mini_docker
+        RuntimeAttr? runtime_attr_collect_pe
+        RuntimeAttr? runtime_attr_split_script
+        RuntimeAttr? runtime_attr_calcu_pe_stat
+        RuntimeAttr? runtime_attr_concat_evidence
+        }
+
+    scatter (i in range(length(batch_name_list))) {
+        call per_batch.CollectPEMetricsPerBatchCPX {
+            input:
+                n_per_split = n_per_split,
+                prefix = "~{prefix}.~{batch_name_list[i]}",
+                batch_name = batch_name_list[i],
+                PE_metric = PE_metrics[i],
+                PE_metrics_idx = PE_metrics_idxes[i],
+                PE_collect_script = PE_collect_script,
+                sv_pipeline_docker = sv_pipeline_docker,
+                sv_base_mini_docker = sv_base_mini_docker,
+                runtime_attr_collect_pe = runtime_attr_collect_pe,
+                runtime_attr_split_script = runtime_attr_split_script,
+                runtime_attr_calcu_pe_stat = runtime_attr_calcu_pe_stat,
+                runtime_attr_concat_evidence = runtime_attr_concat_evidence
+        }
+     }
+
+    call per_batch.ConcatEvidences {
+        input:
+            evidences = CollectPEMetricsPerBatchCPX.evidence,
+            prefix = prefix,
+            sv_base_mini_docker = sv_base_mini_docker,
+            runtime_attr_override = runtime_attr_concat_evidence
+    }
+
+    call CalcuPEStat {
+        input:
+            evidence = ConcatEvidences.concat_evidence,
+            prefix = prefix,
+            sv_base_mini_docker = sv_base_mini_docker,
+            runtime_attr_override = runtime_attr_calcu_pe_stat
+    }
+
+    output {
+        File evidence = ConcatEvidences.concat_evidence
+        File evi_stat = CalcuPEStat.evi_stat
+    }
+}
+
+
+task CalcuPEStat {
+  input {
+    File evidence
+    String prefix
+    String sv_base_mini_docker
+   RuntimeAttr? runtime_attr_override
+  }
+
+  RuntimeAttr default_attr = object {
+    cpu_cores: 1,
+    mem_gb: 3.75,
+    disk_gb: 10,
+    boot_disk_gb: 10,
+    preemptible_tries: 3,
+    max_retries: 1
+  }
+
+  RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
+
+  command <<<
+    set -euo pipefail
+
+    zcat ~{evidence} | cut -f3,6- | uniq -c > ~{prefix}.evi_stat
+    bgzip ~{prefix}.evi_stat
+  >>>
+
+  output {
+    File evi_stat = "~{prefix}.evi_stat.gz"
+  }
+
+  runtime {
+    cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
+    memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
+    disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
+    bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
+    docker: sv_base_mini_docker
+    preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
+    maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
+  }
+}
diff --git a/wdl/CollectPEMetricsPerBatchCPX.wdl b/wdl/CollectPEMetricsPerBatchCPX.wdl
new file mode 100644
index 000000000..3cc65dbeb
--- /dev/null
+++ b/wdl/CollectPEMetricsPerBatchCPX.wdl
@@ -0,0 +1,205 @@
+version 1.0
+
+import "Structs.wdl"
+workflow CollectPEMetricsPerBatchCPX {
+    input {
+        Int n_per_split
+        String batch_name
+        File PE_metric
+        File PE_metrics_idx
+        File PE_collect_script
+        String prefix
+        String sv_base_mini_docker
+        String sv_pipeline_docker
+        RuntimeAttr? runtime_attr_collect_pe
+        RuntimeAttr? runtime_attr_concat_evidence
+        RuntimeAttr? runtime_attr_calcu_pe_stat
+        RuntimeAttr? runtime_attr_split_script
+        }
+
+    call SplitScripts {
+        input:
+            script = PE_collect_script,
+            n_per_split = n_per_split,
+            batch_name = batch_name,
+            sv_base_mini_docker = sv_base_mini_docker,
+            runtime_attr_override = runtime_attr_split_script
+    }
+
+    scatter (script in SplitScripts.script_splits ) {
+        call CollectPEMetrics {
+            input:
+                batch_name = batch_name,
+                PE_metric = PE_metric,
+                PE_metrics_idx = PE_metrics_idx,
+                PE_collect_script = script,
+                sv_pipeline_docker = sv_pipeline_docker,
+                runtime_attr_override = runtime_attr_collect_pe
+        }
+    }
+
+    call ConcatEvidences {
+        input:
+            evidences = CollectPEMetrics.evidence,
+            prefix = prefix,
+            sv_base_mini_docker = sv_base_mini_docker,
+            runtime_attr_override = runtime_attr_concat_evidence
+    }
+
+    output {
+        File evidence = ConcatEvidences.concat_evidence
+    }
+}
+
+
+
+# collect PE metrics
+task CollectPEMetrics {
+  input {
+    String batch_name
+    File PE_metric
+    File PE_metrics_idx
+    File PE_collect_script
+    String sv_pipeline_docker
+    RuntimeAttr? runtime_attr_override
+  }
+
+  RuntimeAttr default_attr = object {
+    cpu_cores: 1,
+    mem_gb: 3.75,
+    disk_gb: ceil(30.0 + size(PE_metric, "GiB") * 3),
+    boot_disk_gb: 10,
+    preemptible_tries: 3,
+    max_retries: 1
+  }
+
+  RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
+
+  command <<<
+    set -euo pipefail
+    mkdir PE_metrics/
+    gsutil cp ~{PE_metric} ./
+    gsutil cp ~{PE_metrics_idx} ./
+    echo "Metrics succesfully downloaded."
+    echo "Starting the evidence collection ..."
+
+    if [ $(wc -c < "~{PE_collect_script}") -gt 0 ]; then
+      bash ~{PE_collect_script}
+    fi
+
+    touch ~{batch_name}.evidence
+    touch ~{batch_name}.0.PE_evidences
+    for peEvFile in *.PE_evidences
+    do
+       cat ${peEvFile} >> ~{batch_name}.evidence
+    done
+
+    bgzip ~{batch_name}.evidence
+
+  >>>
+
+  output {
+    File evidence = "~{batch_name}.evidence.gz"
+  }
+
+  runtime {
+    cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
+    memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
+    disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
+    bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
+    docker: sv_pipeline_docker
+    preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
+    maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
+  }
+}
+
+task ConcatEvidences {
+  input {
+    Array[File] evidences
+    String prefix
+    String sv_base_mini_docker
+   RuntimeAttr? runtime_attr_override
+  }
+
+  RuntimeAttr default_attr = object {
+    cpu_cores: 1,
+    mem_gb: 3.75,
+    disk_gb: 10,
+    boot_disk_gb: 10,
+    preemptible_tries: 3,
+    max_retries: 1
+  }
+
+  RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
+
+  command <<<
+    set -euo pipefail
+
+    while read SPLIT; do
+      zcat $SPLIT
+    done < ~{write_lines(evidences)} \
+      | bgzip -c \
+      > ~{prefix}.evidence.gz
+
+  >>>
+
+  output {
+    File concat_evidence = "~{prefix}.evidence.gz"
+  }
+
+  runtime {
+    cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
+    memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
+    disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
+    bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
+    docker: sv_base_mini_docker
+    preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
+    maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
+  }
+}
+
+task SplitScripts {
+  input {
+    File script
+    String batch_name
+    Int n_per_split
+    String sv_base_mini_docker
+    RuntimeAttr? runtime_attr_override
+  }
+
+  RuntimeAttr default_attr = object {
+    cpu_cores: 1,
+    mem_gb: 3.75,
+    disk_gb: 10,
+    boot_disk_gb: 10,
+    preemptible_tries: 3,
+    max_retries: 1
+  }
+  RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
+
+  output {
+    Array[File] script_splits = glob("collect_PE_evidences.*")
+  }
+  command <<<
+
+    set -e pipefail
+
+    awk '{if ($2~"~{batch_name}") print}' ~{script} > tmp_metrics.sh
+
+    if [ $(wc -c < "tmp_metrics.sh") -gt 0 ]; then
+      split --additional-suffix ".sh" -l ~{n_per_split} -a 6 tmp_metrics.sh collect_PE_evidences.
+    else
+      touch collect_PE_evidences.aaaaaa.sh
+    fi
+
+  >>>
+  runtime {
+    cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
+    memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
+    disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
+    bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
+    docker: sv_base_mini_docker
+    preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
+    maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
+  }
+}
diff --git a/wdl/FilterGenotypes.wdl b/wdl/FilterGenotypes.wdl
index 0603b5750..8926bc31e 100644
--- a/wdl/FilterGenotypes.wdl
+++ b/wdl/FilterGenotypes.wdl
@@ -25,6 +25,9 @@ workflow FilterGenotypes {
     Int optimize_vcf_records_per_shard = 50000
     Int filter_vcf_records_per_shard = 20000
 
+    # For SanitizeHeader - bcftools annotate -x ~{header_drop_fields}
+    String header_drop_fields = "FILTER/LOW_QUALITY,FORMAT/TRUTH_CN_EQUAL,FORMAT/GT_FILTER,FORMAT/CONC_ST,INFO/STATUS,INFO/TRUTH_AC,INFO/TRUTH_AN,INFO/TRUTH_AF,INFO/TRUTH_VID,INFO/CNV_CONCORDANCE,INFO/GENOTYPE_CONCORDANCE,INFO/HET_PPV,INFO/HET_SENSITIVITY,INFO/HOMVAR_PPV,INFO/HOMVAR_SENSITIVITY,INFO/MINSL,INFO/NON_REF_GENOTYPE_CONCORDANCE,INFO/SL_MAX,INFO/SL_MEAN,INFO/VAR_PPV,INFO/VAR_SENSITIVITY,INFO/VAR_SPECIFICITY"
+
     # For MainVcfQc
     File primary_contigs_fai
     File? ped_file
@@ -32,7 +35,7 @@ workflow FilterGenotypes {
     Array[Array[String]]? sample_level_comparison_datasets  # Array of two-element arrays, one per dataset, each of format [prefix, gs:// path to per-sample tarballs]
     File? sample_renaming_tsv # File with mapping to rename per-sample benchmark sample IDs for compatibility with cohort
     Boolean run_qc = true
-    String qc_bcftools_preprocessing_options = "-e 'FILTER~\"UNRESOLVED\" || FILTER~\"HIGH_NCR\"'"
+    String qc_bcftools_preprocessing_options = "-i 'FILTER=\"PASS\" || FILTER=\"MULTIALLELIC\"'"
     Int qc_sv_per_shard = 2500
     Int qc_samples_per_shard = 600
     RuntimeAttr? runtime_override_plot_qc_per_family
@@ -120,10 +123,19 @@ workflow FilterGenotypes {
       sv_base_mini_docker=sv_base_mini_docker
   }
 
+  call SanitizeHeader {
+    input:
+      vcf = ConcatVcfs.concat_vcf,
+      vcf_index = ConcatVcfs.concat_vcf_idx,
+      drop_fields = header_drop_fields,
+      prefix = "~{output_prefix_}.filter_genotypes.sanitized",
+      sv_pipeline_docker = sv_pipeline_docker
+  }
+
   if (run_qc) {
     call qc.MainVcfQc {
       input:
-        vcfs=[ConcatVcfs.concat_vcf],
+        vcfs=[SanitizeHeader.out],
         prefix="~{output_prefix_}.filter_genotypes",
         ped_file=ped_file,
         bcftools_preprocessing_options=qc_bcftools_preprocessing_options,
@@ -142,8 +154,8 @@ workflow FilterGenotypes {
   }
 
   output {
-    File filtered_vcf = ConcatVcfs.concat_vcf
-    File filtered_vcf_index = ConcatVcfs.concat_vcf_idx
+    File filtered_vcf = SanitizeHeader.out
+    File filtered_vcf_index = SanitizeHeader.out_index
     File? main_vcf_qc_tarball = MainVcfQc.sv_vcf_qc_output
 
     # For optional analysis
@@ -295,3 +307,59 @@ task FilterVcf {
     maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
   }
 }
+
+task SanitizeHeader {
+  input {
+    File vcf
+    File vcf_index
+    String prefix
+    String drop_fields
+    String sv_pipeline_docker
+    RuntimeAttr? runtime_attr_override
+  }
+
+  RuntimeAttr runtime_default = object {
+                                  mem_gb: 3.75,
+                                  disk_gb: ceil(10.0 + size(vcf, "GiB") * 2.0),
+                                  cpu_cores: 1,
+                                  preemptible_tries: 3,
+                                  max_retries: 1,
+                                  boot_disk_gb: 10
+                                }
+  RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
+  runtime {
+    memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GiB"
+    disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
+    cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
+    preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
+    maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
+    docker: sv_pipeline_docker
+    bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
+  }
+
+  command <<<
+    set -euo pipefail
+
+    bcftools view --no-version -h ~{vcf} > header.vcf
+
+    grep -v -e ^"##bcftools" header.vcf \
+      -e ^"##source=depth" \
+      -e ^"##source=cleanvcf" \
+      -e ^"##ALT=<ID=UNR" \
+      | sed 's/Split read genotype quality/Split-read genotype quality/g' \
+      | sed 's/##ALT=<ID=BND,Description="Translocation">/##ALT=<ID=BND,Description="Breakend">/g' > newheader.vcf
+
+    bcftools reheader -h newheader.vcf ~{vcf} \
+      | bcftools annotate -x ~{drop_fields} \
+        --no-version \
+        -O z \
+        -o ~{prefix}.vcf.gz
+
+    tabix ~{prefix}.vcf.gz
+  >>>
+
+  output {
+    File out = "~{prefix}.vcf.gz"
+    File out_index = "~{prefix}.vcf.gz.tbi"
+  }
+}
diff --git a/wdl/MakeCohortVcf.wdl b/wdl/MakeCohortVcf.wdl
index 5ea66ea50..f3891fc8d 100644
--- a/wdl/MakeCohortVcf.wdl
+++ b/wdl/MakeCohortVcf.wdl
@@ -44,6 +44,8 @@ workflow MakeCohortVcf {
     File pe_exclude_list
     File depth_exclude_list
     File ref_dict
+    File HERVK_reference
+    File LINE1_reference
     Int max_shard_size_resolve
     Int max_shards_per_chrom_clean_vcf_step1
     Int min_records_per_shard_clean_vcf_step1
@@ -378,6 +380,8 @@ workflow MakeCohortVcf {
       allosome_fai=allosome_fai,
       chr_x=chr_x,
       chr_y=chr_y,
+      HERVK_reference=HERVK_reference,
+      LINE1_reference=LINE1_reference,
       max_shards_per_chrom_step1=max_shards_per_chrom_clean_vcf_step1,
       min_records_per_shard_step1=min_records_per_shard_clean_vcf_step1,
       clean_vcf1b_records_per_shard=clean_vcf1b_records_per_shard,
diff --git a/wdl/RefineComplexVariants.wdl b/wdl/RefineComplexVariants.wdl
new file mode 100644
index 000000000..4340ef253
--- /dev/null
+++ b/wdl/RefineComplexVariants.wdl
@@ -0,0 +1,800 @@
+version 1.0
+
+import "Structs.wdl"
+import "HailMerge.wdl" as hail
+import "TasksMakeCohortVcf.wdl" as MiniTasks
+import "CollectPEMetricsForCPX.wdl" as collect_pe_metrics_for_cpx
+import "CollectLargeCNVSupportForCPX.wdl" as collect_lg_cnv_supp_for_cpx
+import "Utils.wdl" as util
+
+workflow RefineComplexVariants {
+    input {
+        File vcf
+        String prefix
+
+        Array[String] batch_name_list
+        Array[File] batch_sample_lists
+        Array[File] PE_metrics
+        Array[File] PE_metrics_indexes
+        Array[File] Depth_DEL_beds
+        Array[File] Depth_DUP_beds
+
+        Int n_per_split
+        Int min_pe_cpx = 3
+        Int min_pe_ctx = 3
+
+        Boolean use_hail = false
+        String? gcs_project  # required if use_hail = true
+
+        String sv_base_mini_docker
+        String sv_pipeline_docker
+        String linux_docker
+
+        RuntimeAttr? runtime_attr_sample_batch
+        RuntimeAttr? runtime_attr_vcf2bed
+        RuntimeAttr? runtime_attr_collect_pe
+        RuntimeAttr? runtime_attr_scatter_vcf
+        RuntimeAttr? runtime_attr_split_script
+        RuntimeAttr? runtime_attr_calcu_pe_stat
+        RuntimeAttr? runtime_attr_split_cpx_ctx
+        RuntimeAttr? runtime_attr_concat_evidence
+        RuntimeAttr? runtime_attr_concat
+        RuntimeAttr? runtime_attr_preconcat
+        RuntimeAttr? runtime_attr_hail_merge
+        RuntimeAttr? runtime_attr_fix_header
+        RuntimeAttr? runtime_attr_generate_cpx_review_script
+        RuntimeAttr? runtime_attr_generate_cnv_segments_from_cpx
+        RuntimeAttr? runtime_attr_get_vcf_header_with_members_info_line
+        RuntimeAttr? runtime_attr_extract_cpx_lg_cnv_by_batch
+        RuntimeAttr? runtime_attr_seek_depth_supp_for_cpx
+        RuntimeAttr? runtime_attr_concat_bed_Step1
+        RuntimeAttr? runtime_attr_concat_bed_Step2
+        RuntimeAttr? runtime_attr_calcu_cpx_evidences
+    }
+
+    call GetSampleBatchPEMap {
+        input:
+            batch_name_list = batch_name_list,
+            batch_sample_lists = batch_sample_lists,
+            batch_pe_files = write_lines(PE_metrics),
+            prefix = prefix,
+            sv_pipeline_docker = sv_pipeline_docker,
+            runtime_attr_override = runtime_attr_sample_batch
+    }
+
+    call MiniTasks.ScatterVcf {
+        input:
+          vcf = vcf,
+          prefix = prefix,
+          records_per_shard = n_per_split,
+          sv_pipeline_docker = sv_pipeline_docker,
+          runtime_attr_override = runtime_attr_scatter_vcf
+      }
+
+    scatter (i in range(length(ScatterVcf.shards))) {
+
+        call util.VcfToBed {
+            input:
+                vcf_file = ScatterVcf.shards[i],
+                args = "-i ALL --include-filters",
+                variant_interpretation_docker = sv_pipeline_docker,
+                runtime_attr_override = runtime_attr_vcf2bed
+        }
+
+        call SplitCpxCtx {
+            input:
+                bed = VcfToBed.bed_output,
+                sv_base_mini_docker = sv_base_mini_docker,
+                runtime_attr_override = runtime_attr_split_cpx_ctx
+        }
+
+        call collect_lg_cnv_supp_for_cpx.CollectLargeCNVSupportForCPX {
+            input:
+                cpx_ctx_bed = SplitCpxCtx.cpx_ctx_bed,
+                prefix = "~{prefix}.~{i}",
+
+                sample_batch_pe_map = GetSampleBatchPEMap.sample_batch_pe_map,
+                batch_name_list = batch_name_list,
+                Depth_DEL_beds = Depth_DEL_beds,
+                Depth_DUP_beds = Depth_DUP_beds,
+
+                sv_base_mini_docker = sv_base_mini_docker,
+                sv_pipeline_docker = sv_pipeline_docker,
+
+                runtime_attr_concat_bed_Step1 = runtime_attr_concat_bed_Step1,
+                runtime_attr_concat_bed_Step2 = runtime_attr_concat_bed_Step2,
+                runtime_attr_seek_depth_supp_for_cpx = runtime_attr_seek_depth_supp_for_cpx,
+                runtime_attr_extract_cpx_lg_cnv_by_batch = runtime_attr_extract_cpx_lg_cnv_by_batch,
+                runtime_attr_generate_cnv_segments_from_cpx = runtime_attr_generate_cnv_segments_from_cpx
+        }
+
+        call GenerateCpxReviewScript {
+            input:
+                bed = SplitCpxCtx.cpx_ctx_bed,
+                sample_batch_pe_map = GetSampleBatchPEMap.sample_batch_pe_map,
+                sv_pipeline_docker = sv_pipeline_docker,
+                runtime_attr_override = runtime_attr_generate_cpx_review_script
+        }
+
+        call collect_pe_metrics_for_cpx.CollectPEMetricsForCPX {
+            input:
+                batch_name_list = batch_name_list,
+                PE_metrics = PE_metrics,
+                PE_metrics_idxes = PE_metrics_indexes,
+                PE_collect_script = GenerateCpxReviewScript.pe_evidence_collection_script,
+                prefix = "~{prefix}.~{i}",
+                n_per_split = n_per_split,
+                sv_pipeline_docker = sv_pipeline_docker,
+                sv_base_mini_docker = sv_base_mini_docker,
+                runtime_attr_collect_pe = runtime_attr_collect_pe,
+                runtime_attr_split_script = runtime_attr_split_script,
+                runtime_attr_calcu_pe_stat = runtime_attr_calcu_pe_stat,
+                runtime_attr_concat_evidence = runtime_attr_concat_evidence
+        }
+
+        call CalculateCpxEvidences {
+            input:
+                PE_collect_script = GenerateCpxReviewScript.pe_evidence_collection_script,
+                PE_supp = CollectPEMetricsForCPX.evi_stat,
+                depth_supp = CollectLargeCNVSupportForCPX.lg_cnv_depth_supp,
+                min_pe_cpx = min_pe_cpx,
+                prefix = "~{prefix}.~{i}",
+                sv_pipeline_docker = sv_pipeline_docker,
+                runtime_attr_override = runtime_attr_calcu_cpx_evidences
+        }
+
+        call CalculateCtxEvidences{
+            input:
+                PE_collect_script = GenerateCpxReviewScript.pe_evidence_collection_script,
+                PE_supp = CollectPEMetricsForCPX.evi_stat,
+                min_pe_ctx = min_pe_ctx,
+                prefix = "~{prefix}.~{i}",
+                sv_pipeline_docker = sv_pipeline_docker,
+                runtime_attr_override = runtime_attr_calcu_cpx_evidences
+        }
+
+        call ReviseVcf {
+            input:
+                vcf_file = ScatterVcf.shards[i],
+                CPX_manual = CalculateCpxEvidences.manual_revise_CPX_results,
+                CTX_manual = CalculateCtxEvidences.manual_revise_CTX_results,
+                unresolved_svids = GenerateCpxReviewScript.unresolved_svids,
+                prefix = "~{prefix}.~{i}",
+                sv_pipeline_docker = sv_pipeline_docker
+        }
+    }
+
+    if (use_hail) {
+        call hail.HailMerge {
+            input:
+                vcfs=ReviseVcf.revised_vcf,
+                prefix="~{prefix}.cpx_refined",
+                gcs_project=gcs_project,
+                sv_base_mini_docker=sv_base_mini_docker,
+                sv_pipeline_docker=sv_pipeline_docker,
+                runtime_override_preconcat=runtime_attr_preconcat,
+                runtime_override_hail_merge=runtime_attr_hail_merge,
+                runtime_override_fix_header=runtime_attr_fix_header
+        }
+    }
+
+    if (!use_hail) {
+        call MiniTasks.ConcatVcfs {
+            input:
+                vcfs=ReviseVcf.revised_vcf,
+                vcfs_idx=ReviseVcf.revised_vcf_idx,
+                allow_overlaps=true,
+                outfile_prefix="~{prefix}.cpx_refined",
+                sv_base_mini_docker=sv_base_mini_docker,
+                runtime_attr_override=runtime_attr_concat
+        }
+    }
+
+    call MiniTasks.ConcatHeaderedTextFiles {
+        input:
+            text_files = CalculateCpxEvidences.manual_revise_CPX_results,
+            output_filename = "~{prefix}.CPX_evidence.txt" ,
+            linux_docker = linux_docker
+
+    }
+
+    output {
+        File cpx_refined_vcf = select_first([ConcatVcfs.concat_vcf, HailMerge.merged_vcf])
+        File cpx_refined_vcf_index = select_first([ConcatVcfs.concat_vcf_idx, HailMerge.merged_vcf_index])
+        File cpx_evidences = ConcatHeaderedTextFiles.out
+    }
+}
+
+
+task GetSampleBatchPEMap {
+    input {
+        Array[File] batch_sample_lists
+        Array[String] batch_name_list
+        File batch_pe_files
+        String prefix
+        String sv_pipeline_docker
+        RuntimeAttr? runtime_attr_override
+    }
+
+    RuntimeAttr default_attr = object {
+        cpu_cores: 1,
+        mem_gb: 3.75,
+        disk_gb: 10 + 2*ceil(size(flatten([batch_sample_lists]), "GB")),
+        boot_disk_gb: 10,
+        preemptible_tries: 1,
+        max_retries: 1
+    }
+
+    RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
+
+    command <<<
+        set -euo pipefail
+
+        python <<CODE
+import os
+batch_sample_lists = ["~{sep='", "' batch_sample_lists}"]
+batch_name_list = ["~{sep='", "' batch_name_list}"]
+batch_pe_files = []
+with open("~{batch_pe_files}", 'r') as pe:
+    for line in pe:
+        local_file = os.path.basename(line.strip())
+        batch_pe_files.append(local_file)
+with open("~{prefix}.sample_batch_pe_map.tsv", 'w') as out:
+    for i in range(len(batch_name_list)):
+        with open(batch_sample_lists[i], 'r') as inp:
+            for line in inp:
+                sample = line.strip()
+                batch = batch_name_list[i]
+                pe_file = batch_pe_files[i]
+                out.write(f"{sample}\t{batch}\t{pe_file}\n")
+CODE
+
+    >>>
+
+    output {
+        File sample_batch_pe_map = "~{prefix}.sample_batch_pe_map.tsv"
+    }
+
+    runtime {
+        cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
+        memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
+        disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
+        bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
+        docker: sv_pipeline_docker
+        preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
+        maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
+    }
+}
+
+
+task ReviseVcf {
+    input {
+        File vcf_file
+        File CPX_manual
+        File CTX_manual
+        File unresolved_svids
+        String prefix
+        String sv_pipeline_docker
+        RuntimeAttr? runtime_attr_override
+    }
+
+    RuntimeAttr default_attr = object {
+        cpu_cores: 1,
+        mem_gb: 3.75,
+        disk_gb: ceil(5.0 +  size(vcf_file, "GB")*3),
+        boot_disk_gb: 10,
+        preemptible_tries: 1,
+        max_retries: 1
+    }
+    RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
+
+    command<<<
+        set -euo pipefail
+
+        python <<CODE
+
+def CPX_manual_readin(CPX_manual):
+    out={}
+    fin=open(CPX_manual)
+    for line in fin:
+        pin=line.strip().split()
+        if not pin[0] in out.keys():
+            out[pin[0]] = {}
+        out[pin[0]][pin[1]] = pin[2:]
+    fin.close()
+    return out
+
+
+def CTX_manual_readin(CTX_manual):
+    fin=open(CTX_manual)
+    out={}
+    for line in fin:
+        pin=line.strip().split('\t')
+        if not pin[0] in out.keys():
+            out[pin[0]] = {}
+        if not pin[1] in out[pin[0]].keys():
+            out[pin[0]][pin[1]] = pin[2:]
+    fin.close()
+    return out
+
+
+def unresolved_readin(unresolved_svids):
+    svids = set()
+    with open(unresolved_svids, 'r') as inp:
+        for line in inp:
+            svids.add(line.strip())
+    return svids
+
+
+def revise_vcf(vcf_input, vcf_output, hash_CPX_manual, unresolved_svids, hash_CTX_manual):
+    fin=pysam.VariantFile(vcf_input)
+    #revise vcf header
+    header = fin.header
+    fo=pysam.VariantFile(vcf_output, 'w', header = header)
+    for record in fin:
+        if record.id in unresolved_svids:
+            if 'PASS' in record.filter:
+                record.filter.clear()
+            record.filter.add('UNRESOLVED')
+        #label CPX with manual review results:
+        elif record.id in hash_CPX_manual.keys():
+            unresolve_rec = 0
+            for sample in hash_CPX_manual[record.id].keys():
+                if sample in record.samples.keys():
+                    if hash_CPX_manual[record.id][sample][0] == 'no_PE':
+                        if hash_CPX_manual[record.id][sample][1] in ["NA","lack_depth","lack_depth,lack_depth", "lack_depth,depth","depth,lack_depth"]:
+                            record.samples[sample]['GT'] = [None,None]
+                    elif hash_CPX_manual[record.id][sample][0] == 'low_PE':
+                       if hash_CPX_manual[record.id][sample][1] in ["NA","lack_depth","lack_depth,lack_depth", "lack_depth,depth","depth,lack_depth"]:
+                            record.samples[sample]['GT'] = [None,None]
+                    elif hash_CPX_manual[record.id][sample][0] == 'partial_PE':
+                       if hash_CPX_manual[record.id][sample][1] in ["NA","lack_depth","lack_depth,lack_depth", "lack_depth,depth","depth,lack_depth"]:
+                            record.samples[sample]['GT'] = [None,None]
+                            unresolve_rec+=1
+                    elif hash_CPX_manual[record.id][sample][0] == 'high_PE':
+                       if hash_CPX_manual[record.id][sample][1] in ["lack_depth","lack_depth,lack_depth", "lack_depth,depth","depth,lack_depth"]:
+                            record.samples[sample]['GT'] = [None,None]
+            if not unresolve_rec/len(hash_CPX_manual[record.id].keys())<.5:
+                if 'PASS' in record.filter:
+                    record.filter.clear()
+                record.filter.add('UNRESOLVED')
+        #label CTX with manual review results:
+        elif record.id in hash_CTX_manual.keys():
+            if 'NA' in hash_CTX_manual[record.id].keys():
+                record.filter.add('UNRESOLVED')
+            else:
+                for sample in hash_CTX_manual[record.id].keys():
+                    if sample in record.samples.keys():
+                        if hash_CTX_manual[record.id][sample][0] in ['no_PE', 'low_PE', 'partial_PE']:
+                            record.samples[sample]['GT'] = [None,None]
+        # revisions to insertion-type complex events
+        if record.info['SVTYPE']=="CPX" and record.info['CPX_TYPE'] in ['dDUP','dDUP_iDEL','INS_iDEL']:
+            if record.info['CPX_TYPE']=='INS_iDEL':
+                record.info['CPX_INTERVALS'] = ','.join([x for x in record.info['CPX_INTERVALS']] + [record.info['SOURCE']])
+            del record.info['SOURCE']
+        elif record.info['SVTYPE']=="INS" and 'SOURCE' in record.info.keys() and "INV" in record.info['SOURCE']: 
+            record.info['SVTYPE']="CPX"
+            record.alts = ('<CPX>',)
+            del_section = record.stop - record.pos
+            if del_section<50:
+                record.info['CPX_TYPE']="dDUP"
+                record.info['CPX_INTERVALS'] = record.info['SOURCE'].replace('INV','DUP')+','+record.info['SOURCE']
+            else:
+                record.info['CPX_TYPE']="dDUP_iDEL"
+                record.info['CPX_INTERVALS'] = record.info['SOURCE'].replace('INV','DUP')+','+record.info['SOURCE']+','+"DEL_"+record.chrom+":"+str(record.pos)+'-'+str(record.stop)
+            del record.info['SOURCE']
+        fo.write(record) # write out every record that was in the input - NCR will remove ones with no carriers left
+    fin.close()
+    fo.close()
+
+import pysam
+
+unresolved_svids = unresolved_readin("~{unresolved_svids}")
+hash_CPX_manual =  CPX_manual_readin("~{CPX_manual}")
+hash_CTX_manual = CTX_manual_readin("~{CTX_manual}")
+print(len(hash_CPX_manual.keys()))
+revise_vcf("~{vcf_file}", "~{prefix}.Manual_Revised.vcf.gz", hash_CPX_manual, unresolved_svids, hash_CTX_manual)
+
+CODE
+
+        tabix ~{prefix}.Manual_Revised.vcf.gz
+    >>>
+
+    output {
+        File revised_vcf = "~{prefix}.Manual_Revised.vcf.gz"
+        File revised_vcf_idx = "~{prefix}.Manual_Revised.vcf.gz.tbi"
+    }
+
+    runtime {
+        cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
+        memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
+        disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
+        bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
+        docker: sv_pipeline_docker
+        preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
+        maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
+    }
+}
+
+task SplitCpxCtx {
+    input {
+        File bed
+        String sv_base_mini_docker
+        RuntimeAttr? runtime_attr_override
+    }
+
+    RuntimeAttr default_attr = object {
+        cpu_cores: 1,
+        mem_gb: 3.75,
+        disk_gb: 10,
+        boot_disk_gb: 10,
+        preemptible_tries: 1,
+        max_retries: 1
+    }
+    RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
+
+
+    String prefix  = basename(bed, ".bed.gz")
+
+    command <<<
+        set -eu
+
+        zcat ~{bed} | head -1 > ~{prefix}.cpx_ctx.bed
+
+        filterColumn=$(zcat ~{bed} | head -1 | tr "\t" "\n" | awk '$1=="FILTER" {print NR}')
+
+        set -o pipefail
+
+        zcat ~{bed} | awk 'NR > 1' | { grep CPX || true; } | awk -v filter_column=${filterColumn} '$filter_column !~ /UNRESOLVED/' >> ~{prefix}.cpx_ctx.bed
+
+        zcat ~{bed} | awk 'NR > 1' | { grep CTX || true; } >> ~{prefix}.cpx_ctx.bed
+
+        # INS with INV in SOURCE - will be converted to CPX later so need to evaluate evidence
+        zcat ~{bed} | awk 'NR > 1' | { grep INS || true; } | { grep INV || true; } >> ~{prefix}.cpx_ctx.bed
+
+        bgzip ~{prefix}.cpx_ctx.bed
+    >>>
+
+    output {
+        File cpx_ctx_bed = "~{prefix}.cpx_ctx.bed.gz"
+    }
+
+    runtime {
+        cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
+        memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
+        disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
+        bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
+        docker: sv_base_mini_docker
+        preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
+        maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
+    }
+}
+
+task GenerateCpxReviewScript {
+    input {
+        File bed
+        File sample_batch_pe_map
+        File? script
+        String sv_pipeline_docker
+        RuntimeAttr? runtime_attr_override
+    }
+
+    RuntimeAttr default_attr = object {
+        cpu_cores: 1,
+        mem_gb: 3.75,
+        disk_gb: 10,
+        boot_disk_gb: 10,
+        preemptible_tries: 1,
+        max_retries: 1
+    }
+    RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
+
+
+    String prefix  = basename(bed, ".bed.gz")
+
+    command <<<
+        set -euo pipefail
+
+        cut -f1,3 ~{sample_batch_pe_map} > sample_PE_metrics.tsv
+
+        python ~{default="/opt/sv-pipeline/scripts/reformat_CPX_bed_and_generate_script.py" script} \
+        -i ~{bed} \
+        -s sample_PE_metrics.tsv \
+        -p CPX_CTX_disINS.PASS.PE_evidences \
+        -c collect_PE_evidences.CPX_CTX_disINS.PASS.sh \
+        -r ~{prefix}.svelter \
+        -u ~{prefix}.unresolved_svids.txt
+
+    >>>
+
+    output {
+        File pe_evidence_collection_script = "collect_PE_evidences.CPX_CTX_disINS.PASS.sh"
+        File svelter = "~{prefix}.svelter"
+        File unresolved_svids = "~{prefix}.unresolved_svids.txt"
+    }
+
+    runtime {
+        cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
+        memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
+        disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
+        bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
+        docker: sv_pipeline_docker
+        preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
+        maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
+    }
+}
+
+task CalculateCpxEvidences {
+    input {
+        File PE_collect_script
+        File PE_supp
+        File depth_supp
+        Int min_pe_cpx
+        String prefix
+        String sv_pipeline_docker
+        RuntimeAttr? runtime_attr_override
+    }
+
+    RuntimeAttr default_attr = object {
+        cpu_cores: 1,
+        mem_gb: 3.75,
+        disk_gb: 10,
+        boot_disk_gb: 10,
+        preemptible_tries: 1,
+        max_retries: 1
+    }
+    RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
+
+    command <<<
+
+        set -eu
+        awk '{print $6, $(NF-3)}' ~{PE_collect_script} | grep -v "_CTX_"| uniq > sample_SVID.tsv
+
+        set -euo pipefail
+
+
+        python <<CODE
+
+        def sample_svid_readin(filein):
+            out = {}
+            fin=open(filein)
+            for line in fin:
+                pin=line.strip().split()
+                if not pin[1] in out.keys():
+                    out[pin[1]] = []
+                out[pin[1]].append(pin[0])
+            fin.close()
+            return out
+
+        def PE_supp_readin(filein):
+            fin=os.popen(r'''zcat %s'''%(filein))
+            out={}
+            for line in fin:
+                pin=line.strip().split()
+                if not pin[4] in out.keys():
+                    out[pin[4]] = {}
+                if not pin[3] in out[pin[4]].keys():
+                    out[pin[4]][pin[3]] = [[],[]]
+                if pin[1]=="+":
+                    out[pin[4]][pin[3]][0] = [int(pin[0])]+pin[1:3]
+                elif pin[1]=="-":
+                    out[pin[4]][pin[3]][1] = [int(pin[0])]+pin[1:3]
+            fin.close()
+            return out
+
+        def Depth_supp_readin(filein):
+            fin=os.popen(r'''zcat %s'''%(filein))
+            out={}
+            for line in fin:
+                pin=line.strip().split()
+                if not pin[0][0]=='#':
+                    if not pin[4] in out.keys():
+                        out[pin[4]] = {}
+                    if not pin[5] in out[pin[4]].keys():
+                        out[pin[4]][pin[5]] = []
+                    out[pin[4]][pin[5]].append(float(pin[7]))
+            fin.close()
+            return out
+
+        def add_PE_supp(sample_svid, PE_supp):
+            out = {}
+            for svid in sample_svid.keys():
+                out[svid] = {}
+                if svid in PE_supp.keys():
+                    for sample in sample_svid[svid]:
+                        if sample in PE_supp[svid].keys():
+                            pe_supp = 'no_PE'
+                            if len(PE_supp[svid][sample][0])==0:
+                                PE_supp[svid][sample][0].append(0)
+                            if len(PE_supp[svid][sample][1])==0:
+                                PE_supp[svid][sample][1].append(0)
+                            if PE_supp[svid][sample][0][0]>0 or PE_supp[svid][sample][1][0]>0:
+                                pe_supp = 'partial_PE'
+                            if PE_supp[svid][sample][0][0]>0 and PE_supp[svid][sample][1][0]>0:
+                                pe_supp = 'low_PE'
+                            if PE_supp[svid][sample][0][0]>=~{min_pe_cpx} and PE_supp[svid][sample][1][0]>=~{min_pe_cpx}:
+                                pe_supp = 'high_PE'
+                            out[svid][sample] = [pe_supp]
+                        else:
+                            out[svid][sample] = ['no_PE']
+                else:
+                    for sample in sample_svid[svid]:
+                        out[svid][sample] = ['no_PE']
+            return out
+
+        def add_depth_supp(sample_svid_pe, Depth_supp):
+            for svid in sample_svid_pe.keys():
+                if svid in Depth_supp.keys():
+                    for sample in sample_svid_pe[svid].keys():
+                        if sample in Depth_supp[svid].keys():
+                            depth_supp = 'lack_depth'
+                            if Depth_supp[svid][sample][0]>.5:
+                                depth_supp = 'depth'
+                                if len(Depth_supp[svid][sample])>1:
+                                    if not Depth_supp[svid][sample][1]>.5:
+                                        depth_supp+=',lack_depth'
+                            else:
+                                if len(Depth_supp[svid][sample])>1:
+                                    if Depth_supp[svid][sample][1]>.5:
+                                        depth_supp+=',depth'
+                            sample_svid_pe[svid][sample].append(depth_supp)
+                        else:
+                            sample_svid_pe[svid][sample].append('NA')
+                else:
+                    for sample in sample_svid_pe[svid].keys():
+                        sample_svid_pe[svid][sample].append('NA')
+            return sample_svid_pe
+
+        def write_pe_depth_supp(sample_svid_pe_depth, fileout):
+            fo=open(fileout,'w')
+            print('\t'.join(['VID','sample','supportive_PE_counts','depth_supp']), file=fo)
+            for svid in sample_svid_pe_depth.keys():
+                for sample in sample_svid_pe_depth[svid].keys():
+                    print('\t'.join([svid,sample] + sample_svid_pe_depth[svid][sample]), file=fo)
+            fo.close()
+
+        import os
+        sample_svid = sample_svid_readin("sample_SVID.tsv")
+        PE_supp = PE_supp_readin("~{PE_supp}")
+        Depth_supp = Depth_supp_readin("~{depth_supp}")
+        sample_svid_pe = add_PE_supp(sample_svid, PE_supp)
+        sample_svid_pe_depth = add_depth_supp(sample_svid_pe, Depth_supp)
+        write_pe_depth_supp(sample_svid_pe_depth, "~{prefix}.manual_revise.CPX_results")
+
+        CODE
+    >>>
+
+    output {
+        File manual_revise_CPX_results = "~{prefix}.manual_revise.CPX_results"
+    }
+
+    runtime {
+        cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
+        memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
+        disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
+        bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
+        docker: sv_pipeline_docker
+        preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
+        maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
+    }
+}
+
+task CalculateCtxEvidences {
+    input {
+        File PE_collect_script
+        File PE_supp
+        Int min_pe_ctx
+        String prefix
+        String sv_pipeline_docker
+        RuntimeAttr? runtime_attr_override
+    }
+
+    RuntimeAttr default_attr = object {
+        cpu_cores: 1,
+        mem_gb: 3.75,
+        disk_gb: 10,
+        boot_disk_gb: 10,
+        preemptible_tries: 1,
+        max_retries: 1
+    }
+    RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
+
+    command<<<
+
+        set -eu
+
+        awk '{print $6, $(NF-3)}' ~{PE_collect_script} | grep "_CTX_"| uniq > sample_SVID.tsv
+
+        set -eu
+
+        zcat ~{PE_supp} | grep "_CTX_"| uniq > PE_supp.tsv
+
+        set -euo pipefail
+
+        python <<CODE
+
+        def sample_svid_readin(filein):
+            out = {}
+            fin=open(filein)
+            for line in fin:
+                pin=line.strip().split()
+                if not pin[1] in out.keys():
+                    out[pin[1]] = []
+                out[pin[1]].append(pin[0])
+            fin.close()
+            return out
+
+        def PE_supp_readin(filein):
+            fin=open(filein)
+            out={}
+            for line in fin:
+                pin=line.strip().split()
+                if not pin[4] in out.keys():
+                    out[pin[4]] = {}
+                if not pin[3] in out[pin[4]].keys():
+                    out[pin[4]][pin[3]] = [[],[]]
+                if pin[1]=="+":
+                    out[pin[4]][pin[3]][0] = [int(pin[0])]+pin[1:3]
+                elif pin[1]=="-":
+                    out[pin[4]][pin[3]][1] = [int(pin[0])]+pin[1:3]
+            fin.close()
+            return out
+
+        def add_PE_supp(sample_svid, PE_supp):
+            out = {}
+            for svid in sample_svid.keys():
+                out[svid] = {}
+                if svid in PE_supp.keys():
+                    for sample in sample_svid[svid]:
+                        if sample in PE_supp[svid].keys():
+                            pe_supp = 'no_PE'
+                            if len(PE_supp[svid][sample][0])==0:
+                                PE_supp[svid][sample][0].append(0)
+                            if len(PE_supp[svid][sample][1])==0:
+                                PE_supp[svid][sample][1].append(0)
+                            if PE_supp[svid][sample][0][0]>0 or PE_supp[svid][sample][1][0]>0:
+                                pe_supp = 'partial_PE'
+                            if PE_supp[svid][sample][0][0]>0 and PE_supp[svid][sample][1][0]>0:
+                                pe_supp = 'low_PE'
+                            if PE_supp[svid][sample][0][0]>=~{min_pe_ctx} and PE_supp[svid][sample][1][0]>=~{min_pe_ctx}:
+                                pe_supp = 'high_PE'
+                            out[svid][sample] = [pe_supp]
+                        else:
+                            out[svid][sample] = ['no_PE']
+                else:
+                    for sample in sample_svid[svid]:
+                        out[svid][sample] = ['no_PE']
+            return out
+
+        def write_pe_depth_supp(sample_svid_pe_depth, fileout):
+            fo=open(fileout,'w')
+            print('\t'.join(['VID','sample','supportive_PE_counts']), file=fo)
+            for svid in sample_svid_pe_depth.keys():
+                for sample in sample_svid_pe_depth[svid].keys():
+                    print('\t'.join([svid,sample] + sample_svid_pe_depth[svid][sample]), file=fo)
+            fo.close()
+
+        import os
+        sample_svid = sample_svid_readin("sample_SVID.tsv")
+        PE_supp = PE_supp_readin("PE_supp.tsv")
+        sample_svid_pe = add_PE_supp(sample_svid, PE_supp)
+        write_pe_depth_supp(sample_svid_pe, "~{prefix}.manual_revise.CTX_results")
+
+        CODE
+    >>>
+
+    output {
+        File manual_revise_CTX_results = "~{prefix}.manual_revise.CTX_results"
+    }
+
+    runtime {
+        cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
+        memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
+        disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
+        bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
+        docker: sv_pipeline_docker
+        preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
+        maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
+    }
+}
+