From aef6ac98d746d7cb1272586f25d528fddf90ed5b Mon Sep 17 00:00:00 2001
From: epiercehoffman <epierceh@broadinstitute.org>
Date: Thu, 31 Oct 2024 10:32:09 -0400
Subject: [PATCH 1/2] Filter wham-only DELs and scramble-only SVAs in CleanVcf
 & docs updates (#740)

---
 .github/.dockstore.yml                        |   9 +
 README.md                                     |   2 +-
 .../cohort_mode_workspace_dashboard.md.tmpl   | 218 +-----------------
 .../cohort_mode/samples_1kgp_156.tsv.tmpl     | 157 +++++++++++++
 ...kgp.tsv.tmpl => samples_1kgp_312.tsv.tmpl} |   0
 .../VisualizeCnvs.json.tmpl                   |  10 +
 .../VisualizeCnvs/VisualizeCnvs.json.tmpl     |   2 +-
 scripts/test/terra_validation.py              |   2 +-
 wdl/CleanVcfChromosome.wdl                    |  63 ++++-
 website/.gitignore                            |   1 +
 website/docs/advanced/cromwell/overview.md    |   2 +-
 website/docs/best_practices.md                |   4 +-
 website/docs/execution/joint.md               | 131 +++++------
 website/docs/execution/single.md              |   4 +-
 website/docs/gs/calling_modes.md              |   3 +-
 website/docs/intro.md                         |   8 +-
 website/docs/modules/annotate_vcf.md          |   2 +-
 website/docs/modules/apply_manual_filter.md   |  57 -----
 website/docs/modules/concordance.md           |   8 +-
 website/docs/modules/evidence_qc.md           |  36 ++-
 website/docs/modules/filter_batch.md          |  14 +-
 website/docs/modules/index.md                 |   2 +-
 website/docs/modules/main_vcf_qc.md           |   6 +-
 website/docs/modules/refine_cpx.md            |   6 +-
 24 files changed, 357 insertions(+), 390 deletions(-)
 create mode 100644 inputs/templates/terra_workspaces/cohort_mode/samples_1kgp_156.tsv.tmpl
 rename inputs/templates/terra_workspaces/cohort_mode/{samples_1kgp.tsv.tmpl => samples_1kgp_312.tsv.tmpl} (100%)
 create mode 100644 inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/VisualizeCnvs.json.tmpl
 delete mode 100644 website/docs/modules/apply_manual_filter.md

diff --git a/.github/.dockstore.yml b/.github/.dockstore.yml
index 974fdeee6..259be5b57 100644
--- a/.github/.dockstore.yml
+++ b/.github/.dockstore.yml
@@ -198,6 +198,15 @@ workflows:
       tags:
         - /.*/
 
+  - subclass: WDL
+    name: VisualizeCnvs
+    primaryDescriptorPath: /wdl/VisualizeCnvs.wdl
+    filters:
+      branches:
+        - main
+      tags:
+        - /.*/
+
   - subclass: WDL
     name: SingleSamplePipeline
     primaryDescriptorPath: /wdl/GATKSVPipelineSingleSample.wdl
diff --git a/README.md b/README.md
index 09146ef12..a792114ce 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 A structural variation discovery pipeline for Illumina short-read whole-genome sequencing (WGS) data.
 
-For technical documentation on GATK-SV, including how to run the pipeline, please refer to our website.
+For technical documentation on GATK-SV, including how to run the pipeline, please refer to our [website](https://broadinstitute.github.io/gatk-sv/).
 
 ## Repository structure
 * `/carrot`: [Carrot](https://github.com/broadinstitute/carrot) tests
diff --git a/inputs/templates/terra_workspaces/cohort_mode/cohort_mode_workspace_dashboard.md.tmpl b/inputs/templates/terra_workspaces/cohort_mode/cohort_mode_workspace_dashboard.md.tmpl
index c22618380..e9ae6b155 100644
--- a/inputs/templates/terra_workspaces/cohort_mode/cohort_mode_workspace_dashboard.md.tmpl
+++ b/inputs/templates/terra_workspaces/cohort_mode/cohort_mode_workspace_dashboard.md.tmpl
@@ -1,216 +1,6 @@
 # GATK-SV
-GATK-SV is a structural variation discovery pipeline for Illumina short-read whole-genome sequencing (WGS) data. 
+GATK-SV is a structural variation discovery pipeline for Illumina short-read whole-genome sequencing (WGS) data.
+This workspace contains the GATK-SV joint calling pipeline; for the single-sample pipeline refer to [this workspace](https://app.terra.bio/#workspaces/help-gatk/GATK-Structural-Variants-Single-Sample).
 
-Before you begin processing, please read the full pipeline documentation in the README in the [GATK-SV GitHub repository](https://github.com/broadinstitute/gatk-sv). This dashboard will focus on additional information specific to Terra and cannot substitute for the full documentation.
-
-## Data
-The sample data in this workspace is 312 publicly-available 1000 Genomes Project samples from the [NYGC/AnVIL high coverage data set](https://app.terra.bio/#workspaces/anvil-datastorage/1000G-high-coverage-2019), divided into two equally-sized batches.
-
-## Pipeline Expectations
-### What does it do? 
-This pipeline performs structural variation discovery from CRAMs, joint genotyping, and variant resolution on a cohort of samples. 
-
-### Required inputs
-The following inputs must be provided for each sample in the cohort, via the sample table described in **Workspace Setup** step 2:
-
-|Input Type|Input Name|Description|
-|---------|--------|--------------|
-|`String`|`sample_id`|Case sample identifier*|
-|`File`|`bam_or_cram_file`|Path to the GCS location of the input CRAM or BAM file. If using BAM files, an index `.bam.bai` file must either be present in the same directory, or the path must be provided with the input `bam_or_cram_index`. If using CRAM files, an index `.cram.crai` file must either be present in the same directory, or the path must be provided with the input `bam_or_cram_index`.|
-
-*See **Sample ID requirements** below for specifications.
-
-The following cohort-level or batch-level inputs are also required:
-
-|Input Type|Input Name|Description|
-|---------|--------|--------------|
-|`String`|`sample_set_id`|Batch identifier|
-|`String`|`sample_set_set_id`|Cohort identifier|
-|`File`|`cohort_ped_file`|Path to the GCS location of a family structure definitions file in [PED format](https://github.com/broadinstitute/gatk-sv#ped-format).|
-
-### Pipeline outputs
-
-The following are the main pipeline outputs. For more information on the outputs of each module, refer to the [README](https://github.com/broadinstitute/gatk-sv).
-
-|Output Type|Output Name|Description|
-|---------|--------|--------------|
-|`File`|`annotated_vcf`|Annotated SV VCF for the cohort***|
-|`File`|`annotated_vcf_idx`|Index for `annotated_vcf`|
-|`File`|`sv_vcf_qc_output`|QC plots (bundled in a .tar.gz file)|
-
-***Note that this VCF is not filtered
-
-### Pipeline overview
-
-<img alt="pipeline_diagram" title="Pipeline diagram" src="https://media.githubusercontent.com/media/broadinstitute/gatk-sv/0d4d44516bbd565e40e4b5b59ffef51e0b8c3c94/terra_pipeline_diagram.jpg" width="1000">
-
-The following workflows are included in this workspace, to be executed in this order:
-
-1. `01-GatherSampleEvidence`: Per-sample SV evidence collection, including calls from a configurable set of algorithms (Manta, MELT, and Wham), read depth (RD), split read positions (SR), and discordant pair positions (PE).
-2. `02-EvidenceQC`: Dosage bias scoring and ploidy estimation, run on preliminary batches
-3. `03-TrainGCNV`: Per-batch training of a gCNV model for use in `04-GatherBatchEvidence`
-4. `04-GatherBatchEvidence`: Per-batch copy number variant calling using cn.MOPS and GATK gCNV; B-allele frequency (BAF) generation; call and evidence aggregation
-5. `05-ClusterBatch`: Per-batch variant clustering
-6. `06-GenerateBatchMetrics`: Per-batch variant filtering, metric generation
-7. `07-FilterBatchSites`: Per-batch variant filtering and plot SV counts per sample per SV type to enable choice of IQR cutoff for outlier filtration in `08-FilterBatchSamples`
-8. `08-FilterBatchSamples`: Per-batch outlier sample filtration
-9. `09-MergeBatchSites`: Site merging of SVs discovered across batches, run on a cohort-level `sample_set_set`
-10. `10-GenotypeBatch`: Per-batch genotyping of all sites in the cohort
-11. `11-RegenotypeCNVs`: Cohort-level genotype refinement of some depth calls
-12. `12-CombineBatches`: Cohort-level cross-batch integration and clustering
-13. `13-ResolveComplexVariants`: Complex variant resolution
-14. `14-GenotypeComplexVariants`: Complex variant re-genotyping
-15. `15-CleanVcf`: VCF cleanup
-16. `16-RefineComplexVariants`: Complex variant and translocation refinement
-17. `17-JoinRawCalls`: Combines unfiltered calls (from step 5) across batches
-18. `18-SVConcordance`: Annotates variants with genotype concordance against raw calls
-19. `19-FilterGenotypes`: Performs genotype filtering to improve precision and generates QC plots
-20. `20-AnnotateVcf`: Cohort VCF annotations, including functional annotation, allele frequency (AF) annotation, and AF annotation with external population callsets
-
-Additional downstream modules, such as those for visualization, are under development. They are not included in this workspace at this time, but the source code can be found in the [GATK-SV GitHub repository](https://github.com/broadinstitute/gatk-sv). See **Downstream steps** towards the bottom of this page for more information.
-
-Extra workflows (Not part of canonical pipeline, but included for your convenience. May require manual configuration):
-* `MainVcfQc`: Generates VCF QC reports (is run during 18-FilterGenotypes by default)
-* `PlotSVCountsPerSample`: Plot SV counts per sample per SV type
-* `FilterOutlierSamples`: Filter outlier samples (in terms of SV counts) from a single VCF. Recommended to run `PlotSVCountsPerSample` beforehand (configured with the single VCF you want to filter) to enable IQR cutoff choice.
-
-For detailed instructions on running the pipeline in Terra, see **Step-by-step instructions** below.
-
-### How many samples can I process at once?
-
-#### Single-sample vs. cohort mode
-
-There are two modes for this pipeline according to the number of samples you need to process:
-
-1. Single-sample mode (<100 samples): The cohort mode of this pipeline requires at least 100 samples, so for smaller sets of samples we recommend the single-sample version of this pipeline, which is available as a [featured Terra workspace](https://app.terra.bio/#workspaces/help-gatk/GATK-Structural-Variants-Single-Sample).
-2. Cohort mode (>=100 samples): Batches should be 100-500 samples, so you may choose to divide your cohort into multiple batches if you have at least 200 samples. Refer to the [Batching](https://github.com/broadinstitute/gatk-sv#batching) section of the README for further information.
-
-
-#### What is the maximum number of samples the pipeline can handle?
-
-In Terra, we have tested batch sizes of up to 500 samples and cohort sizes of up to 11,000 samples (and 98,000 samples with the final steps split by chromosome). On a separate cromwell server, we have tested the pipeline on cohorts of up to ~140,000 samples.
-
-
-### Time and cost estimates
-
-The following estimates pertain to the 1000 Genomes sample data in this workspace. They represent aggregated run time and cost across modules for the whole pipeline. For workflows run multiple times (on each sample or on each batch), the longest individual runtime was used. Call caching may affect some of this information.
-
-|Number of samples|Time|Total run cost|Per-sample run cost|
-|--------------|--------|----------|----------|
-|312|~76 hours|~$675|~$2.16/sample|
-
-Please note that sample characteristics, cohort size, and level of filtering may influence pipeline compute costs, with average costs ranging between $2-$3 per sample. For instance, PCR+ samples and samples with a high percentage of improperly paired reads have been observed to cost more. Consider [excluding low-quality samples](https://github.com/broadinstitute/gatk-sv#sample-exclusion) prior to processing to keep costs low.
-
-## Running GATK-SV on your data in Terra
-This section will cover how to run the pipeline on your own data in Terra.
-
-### Sample ID requirements 
-
-Refer to [the Sample ID Requirements section of the README](https://github.com/broadinstitute/gatk-sv#sample-id-requirements) for sample ID requirements for the pipeline. IDs that do not meet these requirements may cause errors.
-
-The same requirements apply to family IDs in the PED file, batch IDs (`sample_set_id`), and the cohort ID (`sample_set_set_id`).
-
-Sample IDs are provided to `01-GatherSampleEvidence` directly and need not match sample names from the BAM/CRAM headers. We recommend transforming sample IDs using [this script](https://github.com/talkowski-lab/gnomad_sv_v3/blob/master/sample_id/convert_sample_ids.py) prior to uploading your sample data table. (Currently, sample IDs can be replaced again in `04-GatherBatchEvidence`.) The following files will need to be updated with the transformed sample IDs:
-* Sample data table (for Terra)
-* PED file
-* Sample set membership file (for Terra)
-
-
-### Workspace setup
-
-1. Clone this workspace into a Terra project to which you have access
-
-2. In your new workspace, delete the example data. To do this, go to the *Data* tab of the workspace. Delete the data tables in this order: `sample_set_set`, `sample_set`, and `sample`. For each table, click the 3 dots icon to the right of the table name and click "Delete table". Confirm when prompted.
-<img alt="deleting data tables" title="How to delete the sample data table" src="https://i.imgur.com/43M51WH.png" width="300">
-
-3. Create and upload a new sample data table for your samples. This should be a tab-separated file (.tsv) with one line per sample, as well as a header (first) line. It should contain the columns `entity:sample_id` (first column) and `bam_or_cram_file` at minimum. See the **Required inputs** section above for more information on these inputs. For an example sample data table, refer to the sample data table for the 1000 Genomes samples in this workspace [here in the GATK-SV GitHub repository](https://github.com/broadinstitute/gatk-sv/blob/master/input_templates/terra_workspaces/cohort_mode/samples_1kgp.tsv.tmpl). To upload the TSV file, navigate to the *Data* tab of the workspace, click the `Import Data` button on the top left, and select "Upload TSV".
-<img alt="uploading a TSV data table" title="How to upload a TSV data table" src="https://i.imgur.com/1ZtwseH.png" width="300">
-
-4. Edit the `cohort_ped_file` item in the Workspace Data table (as shown in the screenshot below) to provide the Google URI to the PED file for your cohort (make sure to share it with your Terra proxy account!). 
-<img alt="editing cohort_ped_file" title="How to edit the cohort_ped_file attribute" src="https://i.imgur.com/IFwc0gs.png" width="800">
-
-
-#### Creating sample_sets 
-
-To create batches (in the `sample_set` table), the easiest way is to upload a tab-separated sample set membership file. This file should have one line per sample, plus a header (first) line. The first column should be `membership:sample_set_id` (containing the `sample_set_id` for the sample in question), and the second should be `sample` (containing the sample IDs). Recall that batch IDs (`sample_set_id`) should follow the **Sample ID requirements** laid out above. For an example sample membership file, refer to the one for the 1000 Genomes samples in this workspace [here in the GATK-SV GitHub repository](https://github.com/broadinstitute/gatk-sv/blob/master/input_templates/terra_workspaces/cohort_mode/sample_set_membership_1kgp.tsv.tmpl).
-
-
-### Workflow instructions
-
-#### General recommendations
-
-* It is recommended to run each workflow first on one sample/batch to check that the method is properly configured before you attempt to process all of your data.
-* We recommend enabling call-caching (on by default in each workflow configuration).
-* We recommend enabling automatic intermediate file deletion by checking the box labeled "Delete intermediate outputs" at the top of the workflow launch page every time you start a workflow. With this option enabled, intermediate files (those not present in the Terra data table, and not needed for any further GATK-SV processing) will be deleted automatically if the workflow succeeds. If the workflow fails, the outputs will be retained to enable a re-run to pick up where it left off with call-caching. However, call-caching will not be possible for workflows that have succeeded. For more information on this option, see [this article](https://terra.bio/delete-intermediates-option-now-available-for-workflows-in-terra/). For guidance on managing intermediate storage from failed workflows, or from workflows without the delete intermediates option enabled, see the next bullet point.
-* There are cases when you may need to manage storage in other ways: for workflows that failed (only delete files from a failed workflow after a version has succeeded, to avoid disabling call-caching), for workflows without intermediate file deletion enabled, or once you are done processing and want to delete files from earlier steps in the pipeline that you no longer need. 
-	* One option is to manually delete large files, or directories containing failed workflow intermediates (after re-running the workflow successfully to take advantage of call-caching) with the command `gsutil -m rm gs://path/to/workflow/directory/**file_extension_to_delete` to delete all files with the given extension for that workflow, or `gsutil -m rm -r gs://path/to/workflow/directory/` to delete an entire workflow directory (only after you are done with all the files!). Note that this can take a very long time for larger workflows, which may contain thousands of files.
-	* Another option is to use the `fiss mop` API call to delete all files that do not appear in one of the Terra data tables (intermediate files). Always ensure that you are completely done with a step and you will not need to return before using this option, as it will break call-caching. See [this blog post](https://terra.bio/deleting-intermediate-workflow-outputs/) for more details. This can also be done [via the command line](https://github.com/broadinstitute/fiss/wiki/MOP:-reducing-your-cloud-storage-footprint).
-* If your workflow fails, check the job manager for the error message. Most issues can be resolved by increasing the memory or disk. Do not delete workflow log files until you are done troubleshooting. If call-caching is enabled, do not delete any files from the failed workflow until you have run it successfully.
-* To display run costs, see [this article](https://support.terra.bio/hc/en-us/articles/360037862771#h_01EX5ED53HAZ59M29DRCG24CXY) for one-time setup instructions for non-Broad users.
-
-#### 01-GatherSampleEvidence
-
-Read the full GatherSampleEvidence documentation [here](https://github.com/broadinstitute/gatk-sv#gather-sample-evidence).
-* This workflow runs on a per-sample level, but you can launch many (a few hundred) samples at once, in arbitrary partitions. Make sure to try just one sample first though!
-* Refer to the [Data section of the README](https://github.com/broadinstitute/gatk-sv#data) for details on input files, sample exclusion, and sample IDs.
-* It is normal for a few samples in a cohort to run out of memory during Wham SV calling, so we recommend enabling auto-retry for out-of-memory errors for `01-GatherSampleEvidence` only. Before you launch the workflow, click the checkbox reading "Retry with more memory" and set the memory retry factor to 1.8. This action must be performed each time you launch a `01-GatherSampleEvidence` job.
-* Please note that most large published joint call sets produced by GATK-SV, including gnomAD-SV, included the tool MELT, a state-of-the-art mobile element insertion (MEI) detector, as part of the pipeline. Due to licensing restrictions, we cannot provide a public docker image for this algorithm. The `01-GatherSampleEvidence` workflow does not use MELT as one of the SV callers by default, which will result in less sensitivity to MEI calls. In order to use MELT, you will need to build your own private docker image (example Dockerfile [here](https://github.com/broadinstitute/gatk-sv/blob/master/dockerfiles/melt/Dockerfile)), share it with your Terra proxy account, enter it in the `melt_docker` input in the `01-GatherSampleEvidence` configuration (as a string, surrounded by double-quotes), and then click "Save". No further changes are necessary beyond `01-GatherSampleEvidence`. 
-	* Note that the version of MELT tested with GATK-SV is v2.0.5. If you use a different version to create your own docker image, we recommend testing your image by running one pilot sample through `01-GatherSampleEvidence` to check that it runs as expected, then running a small group of about 10 pilot samples through the pipeline until the end of `04-GatherBatchEvidence` to check that the outputs are compatible with GATK-SV.
-* If you enable "Delete intermediate outputs" whenever you launch this workflow (recommended), BAM files will be deleted for successful runs; but BAM files will not be deleted if the run fails or if intermediate file deletion is not enabled. Since BAM files are large, we recommend deleting them to save on storage costs, but only after fixing and re-running the failed workflow, so that it will call-cache.
-
-
-#### 02-EvidenceQC
-
-Read the full EvidenceQC documentation [here](https://github.com/broadinstitute/gatk-sv#evidence-qc).
-* `02-EvidenceQC` is run on arbitrary cohort partitions of up to 500 samples.
-* The outputs from `02-EvidenceQC` can be used for [preliminary sample QC](https://github.com/broadinstitute/gatk-sv#preliminary-sample-qc) and [batching](https://github.com/broadinstitute/gatk-sv#batching) before moving on to TrainGCNV.
-
-
-#### 03-TrainGCNV
-
-Read the full TrainGCNV documentation [here](https://github.com/broadinstitute/gatk-sv#gcnv-training-1).
-* Before running this workflow, create the batches (~100-500 samples) you will use for the rest of the pipeline based on sample coverage, WGD score (from `02-EvidenceQC`), and PCR status. These will likely not be the same as the batches you used for `02-EvidenceQC`.
-* By default, `03-TrainGCNV` is configured to be run once per `sample_set` on 100 randomly-chosen samples from that set to create a gCNV model for each batch. To modify this behavior, you can set the `n_samples_subsample` parameter to the number of samples to use for training.
-
-#### 04-GatherBatchEvidence
-
-Read the full GatherBatchEvidence documentation [here](https://github.com/broadinstitute/gatk-sv#gather-batch-evidence).
-* Use the same `sample_set` definitions you used for `03-TrainGCNV`.
-* Before running this workflow, ensure that you have updated the `cohort_ped_file` attribute in Workspace Data with your cohort's PED file, with sex assignments updated based on ploidy detection from `02-EvidenceQC`.
-
-#### 05-ClusterBatch and 06-GenerateBatchMetrics
-
-Read the full documentation for these modules [here](https://github.com/broadinstitute/gatk-sv#cluster-batch).
-* Use the same `sample_set` definitions you used for `03-TrainGCNV` and `04-GatherBatchEvidence`.
-
-
-#### 07-FilterBatchSites, 08-FilterBatchSamples
-
-These two workflows make up FilterBatch; they are subdivided in this workspace to enable tuning of outlier filtration cutoffs. Read the full FilterBatch documentation [here](https://github.com/broadinstitute/gatk-sv#filter-batch).
-* Use the same `sample_set` definitions you used for `03-TrainGCNV` through `06-GenerateBatchMetrics`.
-* `07-FilterBatchSites` produces SV count plots and files, as well as a preview of the outlier samples to be filtered. The input `N_IQR_cutoff_plotting` is used to visualize filtration thresholds on the SV count plots and preview the samples to be filtered; the default value is set to 6. You can adjust this value depending on your needs, and you can re-run the workflow with new `N_IQR_cutoff_plotting` values until the plots and outlier sample lists suit the purposes of your study. Once you have chosen an IQR cutoff, provide it to the `N_IQR_cutoff` input in `08-FilterBatchSamples` to filter the VCFs using the chosen cutoff.
-* `08-FilterBatchSamples` performs outlier sample filtration, removing samples with an abnormal number of SV calls of at least one SV type. To tune the filtering threshold to your needs, edit the `N_IQR_cutoff` input value based on the plots and outlier sample preview lists from `07-FilterBatchSites`. The default value for `N_IQR_cutoff` in this step is 10000, which essentially means that no samples are filtered.
-
-#### 09-MergeBatchSites
-
-Read the full MergeBatchSites documentation [here](https://github.com/broadinstitute/gatk-sv#merge-batch-sites).
-* `09-MergeBatchSites` is a cohort-level workflow, so it is run on a `sample_set_set` containing all of the batches in the cohort. Navigate to the Data tab of your workspace. If there is no `sample_set_set` data table, you will need to create it. To do this, select the `sample_set` data table, then select (with the check boxes) all of the batches (`sample_set`) in your cohort. These should be the `sample_sets` that you used to run steps `03-TrainGCNV` through `08-FilterBatchSamples`. Then click the "Edit" icon above the table and choose "Save selection as set." Enter a name that follows the **Sample ID requirements**. This will create a new `sample_set_set` containing all of the `sample_sets` in your cohort. When you launch MergeBatchSites, you can now select this `sample_set_set`.
-
-<img alt="selecting batches" title="Selecting sample_sets in the data table" src="https://i.imgur.com/E5x3qqk.png" width="400">              <img alt="creating a new set" title="Creating a new sample_set_set" src="https://i.imgur.com/pizOtX9.png" width="400">
-* If there is already a `sample_set_set` data table in your workspace, you can create this `sample_set_set` while you are launching the `09-MergeBatchSites` workflow: click "Select Data", choose "Create new sample_set_set [...]", check all the batches to include (all of the ones used in `03-TrainGCNV` through `08-FilterBatchSamples`), and give it a name that follows the **Sample ID requirements**.
-
-<img alt="creating a cohort sample_set_set" title="How to create a cohort sample_set_set" src="https://i.imgur.com/zKEtSbe.png" width="500">
-
-#### 10-GenotypeBatch
-
-Read the full GenotypeBatch documentation [here](https://github.com/broadinstitute/gatk-sv#genotype-batch).
-* Use the same `sample_set` definitions you used for `03-TrainGCNV` through `08-FilterBatchSamples`.
-
-#### 11-RegenotypeCNVs, 12-CombineBatches, 13-ResolveComplexVariants, 14-GenotypeComplexVariants, 15-CleanVcf, 16-RefineComplexVariants, 17-JoinRawCalls, 18-SVConcordance, 19-FilterGenotypes, and 20-AnnotateVcf
-
-Read the full documentation for [RegenotypeCNVs](https://github.com/broadinstitute/gatk-sv#regenotype-cnvs), [MakeCohortVcf](https://github.com/broadinstitute/gatk-sv#make-cohort-vcf) (which includes `CombineBatches`, `ResolveComplexVariants`, `GenotypeComplexVariants`, `CleanVcf`), [`RefineComplexVariants`](https://github.com/broadinstitute/gatk-sv#refine-complex), [`JoinRawCalls`](https://github.com/broadinstitute/gatk-sv#join-raw-calls), [`SVConcordance`](https://github.com/broadinstitute/gatk-sv#svconcordance), [`FilterGenotypes`](https://github.com/broadinstitute/gatk-sv#filter-genotypes), and [AnnotateVcf](https://github.com/broadinstitute/gatk-sv#annotate-vcf) on the README.
-* Use the same cohort `sample_set_set` you created and used for `09-MergeBatchSites`.
-
-#### Downstream steps
-
-Additional downstream steps are under development.
+Please refer to the GATK-SV [website](https://broadinstitute.github.io/gatk-sv/) for documentation.
+The [joint calling pipeline execution page](https://broadinstitute.github.io/gatk-sv/docs/execution/joint) contains detailed information on this Terra workspace and how to apply the pipeline to your own data.
diff --git a/inputs/templates/terra_workspaces/cohort_mode/samples_1kgp_156.tsv.tmpl b/inputs/templates/terra_workspaces/cohort_mode/samples_1kgp_156.tsv.tmpl
new file mode 100644
index 000000000..10d19a18e
--- /dev/null
+++ b/inputs/templates/terra_workspaces/cohort_mode/samples_1kgp_156.tsv.tmpl
@@ -0,0 +1,157 @@
+entity:sample_id	bam_or_cram_file
+HG00096	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG00096/analysis/HG00096.final.cram
+HG00129	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG00129/analysis/HG00129.final.cram
+HG00140	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG00140/analysis/HG00140.final.cram
+HG00150	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG00150/analysis/HG00150.final.cram
+HG00187	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG00187/analysis/HG00187.final.cram
+HG00239	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG00239/analysis/HG00239.final.cram
+HG00277	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG00277/analysis/HG00277.final.cram
+HG00288	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG00288/analysis/HG00288.final.cram
+HG00337	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG00337/analysis/HG00337.final.cram
+HG00349	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG00349/analysis/HG00349.final.cram
+HG00375	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG00375/analysis/HG00375.final.cram
+HG00410	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG00410/analysis/HG00410.final.cram
+HG00457	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG00457/analysis/HG00457.final.cram
+HG00557	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG00557/analysis/HG00557.final.cram
+HG00599	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG00599/analysis/HG00599.final.cram
+HG00625	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG00625/analysis/HG00625.final.cram
+HG00701	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG00701/analysis/HG00701.final.cram
+HG00740	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG00740/analysis/HG00740.final.cram
+HG00844	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG00844/analysis/HG00844.final.cram
+HG01060	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01060/analysis/HG01060.final.cram
+HG01085	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01085/analysis/HG01085.final.cram
+HG01112	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01112/analysis/HG01112.final.cram
+HG01275	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01275/analysis/HG01275.final.cram
+HG01325	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01325/analysis/HG01325.final.cram
+HG01344	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01344/analysis/HG01344.final.cram
+HG01356	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01356/analysis/HG01356.final.cram
+HG01384	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01384/analysis/HG01384.final.cram
+HG01393	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01393/analysis/HG01393.final.cram
+HG01396	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01396/analysis/HG01396.final.cram
+HG01474	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01474/analysis/HG01474.final.cram
+HG01507	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01507/analysis/HG01507.final.cram
+HG01572	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01572/analysis/HG01572.final.cram
+HG01607	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01607/analysis/HG01607.final.cram
+HG01709	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01709/analysis/HG01709.final.cram
+HG01747	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01747/analysis/HG01747.final.cram
+HG01790	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01790/analysis/HG01790.final.cram
+HG01794	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01794/analysis/HG01794.final.cram
+HG01799	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01799/analysis/HG01799.final.cram
+HG01861	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01861/analysis/HG01861.final.cram
+HG01874	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01874/analysis/HG01874.final.cram
+HG01880	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01880/analysis/HG01880.final.cram
+HG01885	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01885/analysis/HG01885.final.cram
+HG01958	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01958/analysis/HG01958.final.cram
+HG01982	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG01982/analysis/HG01982.final.cram
+HG02002	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02002/analysis/HG02002.final.cram
+HG02010	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02010/analysis/HG02010.final.cram
+HG02019	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02019/analysis/HG02019.final.cram
+HG02020	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02020/analysis/HG02020.final.cram
+HG02069	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02069/analysis/HG02069.final.cram
+HG02085	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02085/analysis/HG02085.final.cram
+HG02186	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02186/analysis/HG02186.final.cram
+HG02221	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02221/analysis/HG02221.final.cram
+HG02235	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02235/analysis/HG02235.final.cram
+HG02272	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02272/analysis/HG02272.final.cram
+HG02275	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02275/analysis/HG02275.final.cram
+HG02299	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02299/analysis/HG02299.final.cram
+HG02332	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02332/analysis/HG02332.final.cram
+HG02367	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02367/analysis/HG02367.final.cram
+HG02374	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02374/analysis/HG02374.final.cram
+HG02489	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02489/analysis/HG02489.final.cram
+HG02490	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02490/analysis/HG02490.final.cram
+HG02491	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02491/analysis/HG02491.final.cram
+HG02586	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02586/analysis/HG02586.final.cram
+HG02588	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02588/analysis/HG02588.final.cram
+HG02611	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02611/analysis/HG02611.final.cram
+HG02620	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02620/analysis/HG02620.final.cram
+HG02642	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02642/analysis/HG02642.final.cram
+HG02648	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02648/analysis/HG02648.final.cram
+HG02658	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02658/analysis/HG02658.final.cram
+HG02855	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02855/analysis/HG02855.final.cram
+HG02953	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG02953/analysis/HG02953.final.cram
+HG03007	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03007/analysis/HG03007.final.cram
+HG03009	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03009/analysis/HG03009.final.cram
+HG03085	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03085/analysis/HG03085.final.cram
+HG03099	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03099/analysis/HG03099.final.cram
+HG03100	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03100/analysis/HG03100.final.cram
+HG03111	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03111/analysis/HG03111.final.cram
+HG03369	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03369/analysis/HG03369.final.cram
+HG03370	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03370/analysis/HG03370.final.cram
+HG03436	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03436/analysis/HG03436.final.cram
+HG03449	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03449/analysis/HG03449.final.cram
+HG03472	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03472/analysis/HG03472.final.cram
+HG03476	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03476/analysis/HG03476.final.cram
+HG03556	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03556/analysis/HG03556.final.cram
+HG03604	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03604/analysis/HG03604.final.cram
+HG03649	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03649/analysis/HG03649.final.cram
+HG03684	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03684/analysis/HG03684.final.cram
+HG03694	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03694/analysis/HG03694.final.cram
+HG03709	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03709/analysis/HG03709.final.cram
+HG03722	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03722/analysis/HG03722.final.cram
+HG03727	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03727/analysis/HG03727.final.cram
+HG03744	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03744/analysis/HG03744.final.cram
+HG03756	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03756/analysis/HG03756.final.cram
+HG03789	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03789/analysis/HG03789.final.cram
+HG03850	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03850/analysis/HG03850.final.cram
+HG03864	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03864/analysis/HG03864.final.cram
+HG03872	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03872/analysis/HG03872.final.cram
+HG03888	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG03888/analysis/HG03888.final.cram
+HG04118	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG04118/analysis/HG04118.final.cram
+HG04158	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG04158/analysis/HG04158.final.cram
+HG04161	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG04161/analysis/HG04161.final.cram
+HG04183	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG04183/analysis/HG04183.final.cram
+NA06984	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA06984/analysis/NA06984.final.cram
+NA10847	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA10847/analysis/NA10847.final.cram
+NA11894	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA11894/analysis/NA11894.final.cram
+NA12340	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA12340/analysis/NA12340.final.cram
+NA12489	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA12489/analysis/NA12489.final.cram
+NA12872	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA12872/analysis/NA12872.final.cram
+NA18499	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA18499/analysis/NA18499.final.cram
+NA18507	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA18507/analysis/NA18507.final.cram
+NA18530	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA18530/analysis/NA18530.final.cram
+NA18539	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA18539/analysis/NA18539.final.cram
+NA18549	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA18549/analysis/NA18549.final.cram
+NA18553	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA18553/analysis/NA18553.final.cram
+NA18560	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA18560/analysis/NA18560.final.cram
+NA18638	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA18638/analysis/NA18638.final.cram
+NA18923	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA18923/analysis/NA18923.final.cram
+NA18941	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA18941/analysis/NA18941.final.cram
+NA18945	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA18945/analysis/NA18945.final.cram
+NA18956	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA18956/analysis/NA18956.final.cram
+NA18995	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA18995/analysis/NA18995.final.cram
+NA19001	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA19001/analysis/NA19001.final.cram
+NA19035	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA19035/analysis/NA19035.final.cram
+NA19062	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA19062/analysis/NA19062.final.cram
+NA19102	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA19102/analysis/NA19102.final.cram
+NA19143	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA19143/analysis/NA19143.final.cram
+NA19184	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA19184/analysis/NA19184.final.cram
+NA19350	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA19350/analysis/NA19350.final.cram
+NA19351	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA19351/analysis/NA19351.final.cram
+NA19377	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA19377/analysis/NA19377.final.cram
+NA19443	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA19443/analysis/NA19443.final.cram
+NA19449	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA19449/analysis/NA19449.final.cram
+NA19661	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA19661/analysis/NA19661.final.cram
+NA19678	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA19678/analysis/NA19678.final.cram
+NA19679	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA19679/analysis/NA19679.final.cram
+NA19684	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA19684/analysis/NA19684.final.cram
+NA19746	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA19746/analysis/NA19746.final.cram
+NA19795	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA19795/analysis/NA19795.final.cram
+NA19818	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA19818/analysis/NA19818.final.cram
+NA19913	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA19913/analysis/NA19913.final.cram
+NA20126	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA20126/analysis/NA20126.final.cram
+NA20320	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA20320/analysis/NA20320.final.cram
+NA20321	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA20321/analysis/NA20321.final.cram
+NA20346	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA20346/analysis/NA20346.final.cram
+NA20509	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA20509/analysis/NA20509.final.cram
+NA20510	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA20510/analysis/NA20510.final.cram
+NA20522	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA20522/analysis/NA20522.final.cram
+NA20752	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA20752/analysis/NA20752.final.cram
+NA20764	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA20764/analysis/NA20764.final.cram
+NA20802	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA20802/analysis/NA20802.final.cram
+NA20845	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA20845/analysis/NA20845.final.cram
+NA20869	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA20869/analysis/NA20869.final.cram
+NA20895	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA20895/analysis/NA20895.final.cram
+NA21102	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA21102/analysis/NA21102.final.cram
+NA21122	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA21122/analysis/NA21122.final.cram
+NA21133	gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_NA21133/analysis/NA21133.final.cram
diff --git a/inputs/templates/terra_workspaces/cohort_mode/samples_1kgp.tsv.tmpl b/inputs/templates/terra_workspaces/cohort_mode/samples_1kgp_312.tsv.tmpl
similarity index 100%
rename from inputs/templates/terra_workspaces/cohort_mode/samples_1kgp.tsv.tmpl
rename to inputs/templates/terra_workspaces/cohort_mode/samples_1kgp_312.tsv.tmpl
diff --git a/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/VisualizeCnvs.json.tmpl b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/VisualizeCnvs.json.tmpl
new file mode 100644
index 000000000..5826cb05f
--- /dev/null
+++ b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/VisualizeCnvs.json.tmpl
@@ -0,0 +1,10 @@
+{
+  "VisualizeCnvs.vcf_or_bed": "${this.filtered_vcf}",
+  "VisualizeCnvs.prefix": "${this.sample_set_set_id}",
+  "VisualizeCnvs.median_files": "${this.sample_sets.median_cov}",
+  "VisualizeCnvs.rd_files": "${this.sample_sets.merged_bincov}",
+  "VisualizeCnvs.ped_file": "${workspace.cohort_ped_file}",
+  "VisualizeCnvs.min_size": 50000,
+  "VisualizeCnvs.flags": "-s 999999999",
+  "VisualizeCnvs.sv_pipeline_docker": "${workspace.sv_pipeline_docker}"
+}
\ No newline at end of file
diff --git a/inputs/templates/test/VisualizeCnvs/VisualizeCnvs.json.tmpl b/inputs/templates/test/VisualizeCnvs/VisualizeCnvs.json.tmpl
index e4214c855..c4f4609da 100644
--- a/inputs/templates/test/VisualizeCnvs/VisualizeCnvs.json.tmpl
+++ b/inputs/templates/test/VisualizeCnvs/VisualizeCnvs.json.tmpl
@@ -5,6 +5,6 @@
   "VisualizeCnvs.rd_files": [{{ test_batch.merged_coverage_file | tojson }}],
   "VisualizeCnvs.ped_file": {{ test_batch.ped_file | tojson }},
   "VisualizeCnvs.min_size": 50000,
-  "VisualizeCnvs.flags": "",
+  "VisualizeCnvs.flags": "-s 999999999",
   "VisualizeCnvs.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}
 }
\ No newline at end of file
diff --git a/scripts/test/terra_validation.py b/scripts/test/terra_validation.py
index aa9079d19..5a4893bf7 100644
--- a/scripts/test/terra_validation.py
+++ b/scripts/test/terra_validation.py
@@ -113,7 +113,7 @@ def main():
     parser.add_argument("-j", "--womtool-jar", help="Path to womtool jar", required=True)
     parser.add_argument("-n", "--num-input-jsons",
                         help="Number of Terra input JSONs expected",
-                        required=False, default=25, type=int)
+                        required=False, default=26, type=int)
     parser.add_argument("--log-level",
                         help="Specify level of logging information, ie. info, warning, error (not case-sensitive)",
                         required=False, default="INFO")
diff --git a/wdl/CleanVcfChromosome.wdl b/wdl/CleanVcfChromosome.wdl
index a14ffa8c4..5800be909 100644
--- a/wdl/CleanVcfChromosome.wdl
+++ b/wdl/CleanVcfChromosome.wdl
@@ -53,6 +53,7 @@ workflow CleanVcfChromosome {
     RuntimeAttr? runtime_override_stitch_fragmented_cnvs
     RuntimeAttr? runtime_override_final_cleanup
     RuntimeAttr? runtime_override_rescue_me_dels
+    RuntimeAttr? runtime_attr_add_high_fp_rate_filters
 
     # Clean vcf 1b
     RuntimeAttr? runtime_attr_override_subset_large_cnvs_1b
@@ -299,9 +300,17 @@ workflow CleanVcfChromosome {
       runtime_attr_override = runtime_override_rescue_me_dels
   }
 
-  call FinalCleanup {
+  call AddHighFDRFilters {
     input:
       vcf=RescueMobileElementDeletions.out,
+      prefix="~{prefix}.high_fdr_filtered",
+      sv_pipeline_docker=sv_pipeline_docker,
+      runtime_attr_override=runtime_attr_add_high_fp_rate_filters
+  }
+
+  call FinalCleanup {
+    input:
+      vcf=AddHighFDRFilters.out,
       contig=contig,
       prefix="~{prefix}.final_cleanup",
       sv_pipeline_docker=sv_pipeline_docker,
@@ -799,6 +808,58 @@ task StitchFragmentedCnvs {
   }
 }
 
+# Add FILTER status for pockets of variants with high FP rate: wham-only DELs and Scramble-only SVAs with HIGH_SR_BACKGROUND
+task AddHighFDRFilters {
+  input {
+    File vcf
+    String prefix
+    String sv_pipeline_docker
+    RuntimeAttr? runtime_attr_override
+  }
+
+  Float input_size = size(vcf, "GiB")
+  RuntimeAttr runtime_default = object {
+    mem_gb: 3.75,
+    disk_gb: ceil(10.0 + input_size * 3.0),
+    cpu_cores: 1,
+    preemptible_tries: 3,
+    max_retries: 1,
+    boot_disk_gb: 10
+  }
+  RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
+  runtime {
+    memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
+    disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
+    cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
+    preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
+    maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
+    docker: sv_pipeline_docker
+    bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
+  }
+
+  command <<<
+    set -euo pipefail
+
+    python <<CODE
+import pysam
+with pysam.VariantFile("~{vcf}", 'r') as fin:
+  header = fin.header
+  header.add_line("##FILTER=<ID=HIGH_ALGORITHM_FDR,Description=\"Categories of variants with low precision including Wham-only deletions and certain Scramble SVAs\">")
+  with pysam.VariantFile("~{prefix}.vcf.gz", 'w', header=header) as fo:
+    for record in fin:
+        if (record.info['ALGORITHMS'] == ('wham',) and record.info['SVTYPE'] == 'DEL') or \
+          (record.info['ALGORITHMS'] == ('scramble',) and record.info['HIGH_SR_BACKGROUND'] and record.alts == ('<INS:ME:SVA>',)):
+            record.filter.add('HIGH_ALGORITHM_FDR')
+        fo.write(record)
+CODE
+  >>>
+
+  output {
+    File out = "~{prefix}.vcf.gz"
+  }
+}
+
+
 
 # Final VCF cleanup
 task FinalCleanup {
diff --git a/website/.gitignore b/website/.gitignore
index b2d6de306..9c0d0c366 100644
--- a/website/.gitignore
+++ b/website/.gitignore
@@ -7,6 +7,7 @@
 # Generated files
 .docusaurus
 .cache-loader
+package-lock.json
 
 # Misc
 .DS_Store
diff --git a/website/docs/advanced/cromwell/overview.md b/website/docs/advanced/cromwell/overview.md
index 84ff94ae4..602edb9b2 100644
--- a/website/docs/advanced/cromwell/overview.md
+++ b/website/docs/advanced/cromwell/overview.md
@@ -29,7 +29,7 @@ Google Cloud Platform (GCP).
 
 # Cromwell Server
 
-There are two option to communicate with a running Cromwell server: 
+There are two options to communicate with a running Cromwell server: 
 [REST API](https://cromwell.readthedocs.io/en/stable/tutorials/ServerMode/), and
 [Cromshell](https://github.com/broadinstitute/cromshell) which is a command line tool
 to interface with a Cromwell server. We recommend using Cromshell due to its simplicity 
diff --git a/website/docs/best_practices.md b/website/docs/best_practices.md
index 4c0695d58..6cd86120d 100644
--- a/website/docs/best_practices.md
+++ b/website/docs/best_practices.md
@@ -4,8 +4,8 @@ description: Guide for using GATK-SV
 sidebar_position: 4
 ---
 
-A comprehensive guide for the single-sample calling mode is available in [GATK Best Practices for Structural Variation 
-Discovery on Single Samples](https://gatk.broadinstitute.org/hc/en-us/articles/9022653744283-GATK-Best-Practices-for-Structural-Variation-Discovery-on-Single-Samples).
+A comprehensive guide for the single-sample [calling mode](/docs/gs/calling_modes) is available in 
+[GATK Best Practices for Structural Variation Discovery on Single Samples](https://gatk.broadinstitute.org/hc/en-us/articles/9022653744283-GATK-Best-Practices-for-Structural-Variation-Discovery-on-Single-Samples).
 This material covers basic concepts of structural variant calling, specifics of SV VCF formatting, and 
 advanced troubleshooting that also apply to the joint calling mode as well. This guide is intended to supplement 
 documentation found here.
diff --git a/website/docs/execution/joint.md b/website/docs/execution/joint.md
index 176dfbd07..fdb0378a1 100644
--- a/website/docs/execution/joint.md
+++ b/website/docs/execution/joint.md
@@ -11,9 +11,8 @@ which is configured with a demo sample set.
 Refer to the following sections for instructions on how to run the pipeline on your data using this workspace.
 
 ### Default data
-The demonstration data in this workspace is 312 publicly-available 1000 Genomes Project samples from the 
-[NYGC/AnVIL high coverage data set](https://app.terra.bio/#workspaces/anvil-datastorage/1000G-high-coverage-2019), 
-divided into two equally-sized batches.
+The demonstration data in this workspace is 156 publicly-available 1000 Genomes Project samples from the 
+[NYGC/AnVIL high coverage data set](https://app.terra.bio/#workspaces/anvil-datastorage/1000G-high-coverage-2019).
 
 ## Pipeline Expectations
 ### What does it do?
@@ -21,16 +20,16 @@ This pipeline performs structural variation discovery from CRAMs, joint genotypi
 of samples.
 
 ### Required inputs
+Refer to the [Input Data section](/docs/gs/inputs) for details on file formats, sample QC, and sample ID restrictions.
+
 The following inputs must be provided for each sample in the cohort, via the sample table described in **Workspace 
 Setup** step 2:
 
 |Input Type|Input Name|Description|
 |---------|--------|--------------|
-|`String`|`sample_id`|Case sample identifier*|
+|`String`|`sample_id`|Case sample identifier|
 |`File`|`bam_or_cram_file`|Path to the GCS location of the input CRAM or BAM file.|
 
-*See **Sample ID requirements** below for specifications.
-
 The following cohort-level or batch-level inputs are also required:
 
 |Input Type|Input Name|Description|
@@ -54,34 +53,35 @@ The following are the main pipeline outputs. For more information on the outputs
 
 ### Pipeline overview
 
-<img alt="pipeline_diagram" title="Pipeline diagram" src="https://media.githubusercontent.com/media/broadinstitute/gatk-sv/0d4d44516bbd565e40e4b5b59ffef51e0b8c3c94/terra_pipeline_diagram.jpg" width="1000" />
+<img alt="pipeline_diagram" title="Pipeline diagram" src="https://media.githubusercontent.com/media/broadinstitute/gatk-sv/refs/tags/v1.0/terra_pipeline_diagram.jpg" width="1000" />
 
-The following workflows are included in this workspace, to be executed in this order:
+The following workflows and Jupyter notebooks are included in this workspace, to be executed in this order:
 
 1. `01-GatherSampleEvidence`: Per-sample SV evidence collection, including calls from a configurable set of 
 algorithms (Manta, MELT, and Wham), read depth (RD), split read positions (SR), and discordant pair positions (PE).
 2. `02-EvidenceQC`: Dosage bias scoring and ploidy estimation, run on preliminary batches
-3. `03-TrainGCNV`: Per-batch training of a gCNV model for use in `04-GatherBatchEvidence`
-4. `04-GatherBatchEvidence`: Per-batch copy number variant calling using cn.MOPS and GATK gCNV; B-allele frequency (BAF) 
+3. [Notebook] `SampleQC.ipynb`: Interactively perform sample QC and filtering using outputs from `02-EvidenceQC`
+4. [Notebook] `Batching.ipynb`: Create batches for subsequent steps. For cohorts >500 samples or smaller heterogeneous cohorts
+5. `03-TrainGCNV`: Per-batch training of a gCNV model for use in `04-GatherBatchEvidence`
+6. `04-GatherBatchEvidence`: Per-batch copy number variant calling using cn.MOPS and GATK gCNV; B-allele frequency (BAF) 
 generation; call and evidence aggregation
-5. `05-ClusterBatch`: Per-batch variant clustering
-6. `06-GenerateBatchMetrics`: Per-batch variant filtering, metric generation
-7. `07-FilterBatchSites`: Per-batch variant filtering and plot SV counts per sample per SV type to enable choice of IQR 
+7. `05-ClusterBatch`: Per-batch variant clustering
+8. `06-GenerateBatchMetrics`: Per-batch variant filtering, metric generation
+9. `07-FilterBatchSites`: Per-batch variant filtering and plot SV counts per sample per SV type to enable choice of IQR 
 cutoff for outlier filtration in `08-FilterBatchSamples`
-8. `08-FilterBatchSamples`: Per-batch outlier sample filtration
-9. `09-MergeBatchSites`: Site merging of SVs discovered across batches, run on a cohort-level `sample_set_set`
-10. `10-GenotypeBatch`: Per-batch genotyping of all sites in the cohort
-11. `11-RegenotypeCNVs`: Cohort-level genotype refinement of some depth calls
-12. `12-CombineBatches`: Cohort-level cross-batch integration and clustering
-13. `13-ResolveComplexVariants`: Complex variant resolution
-14. `14-GenotypeComplexVariants`: Complex variant re-genotyping
-15. `15-CleanVcf`: VCF cleanup
-16. `16-RefineComplexVariants`: Complex variant filtering and refinement
-17. `17-ApplyManualVariantFilter`: Hard filtering high-FP SV classes
-18. `18-JoinRawCalls`: Raw call aggregation
-19. `19-SVConcordance`: Annotate genotype concordance with raw calls
-20. `20-FilterGenotypes`: Genotype filtering
-21. `21-AnnotateVcf`: Cohort VCF annotations, including functional annotation, allele frequency (AF) annotation, and 
+10. `08-FilterBatchSamples`: Per-batch outlier sample filtration
+11. `09-MergeBatchSites`: Site merging of SVs discovered across batches, run on a cohort-level `sample_set_set`
+12. `10-GenotypeBatch`: Per-batch genotyping of all sites in the cohort
+13. `11-RegenotypeCNVs`: Cohort-level genotype refinement of some depth calls
+14. `12-CombineBatches`: Cohort-level cross-batch integration and clustering
+15. `13-ResolveComplexVariants`: Complex variant resolution
+16. `14-GenotypeComplexVariants`: Complex variant re-genotyping
+17. `15-CleanVcf`: VCF cleanup
+18. `16-RefineComplexVariants`: Complex variant filtering and refinement
+19. `17-JoinRawCalls`: Raw call aggregation
+20. `18-SVConcordance`: Annotate genotype concordance with raw calls
+21. `19-FilterGenotypes`: Genotype filtering
+22. `20-AnnotateVcf`: Cohort VCF annotations, including functional annotation, allele frequency (AF) annotation, and 
 AF annotation with external population callsets
 
 Extra workflows (Not part of canonical pipeline, but included for your convenience. May require manual configuration):
@@ -115,13 +115,12 @@ with average costs ranging between $2-$3 per sample. For instance, PCR+ samples
 of improperly paired reads have been observed to cost more. Consider 
 [excluding low-quality samples](/docs/gs/inputs#sample-exclusion) prior to processing to keep costs low.
 
-### Sample ID format
-
-Refer to [the sample ID requirements section](/docs/gs/inputs#sampleids) of the documentation.
 
 ### Workspace setup
 
-1. Clone this workspace into a Terra project to which you have access
+1. Clone this workspace into a Terra project to which you have access. Select `us-central1` for the workspace region.
+   If you must use a different region, you will need to copy all GATK-SV docker images to the other region
+   before running the pipeline. See the [docker images section](/docs/gs/dockers#regions-important) for details.
 
 2. In your new workspace, delete the example data. To do this, go to the *Data* tab of the workspace. Delete the data 
    tables in this order: `sample_set_set`, `sample_set`, and `sample`. For each table, click the 3 dots icon to the 
@@ -132,7 +131,7 @@ Refer to [the sample ID requirements section](/docs/gs/inputs#sampleids) of the
    per sample, as well as a header (first) line. It should contain the columns `entity:sample_id` (first column) and 
    `bam_or_cram_file` at minimum. See the **Required inputs** section above for more information on these inputs. For 
    an example sample data table, refer to the sample data table for the 1000 Genomes samples in this workspace 
-   [here in the GATK-SV GitHub repository](https://github.com/broadinstitute/gatk-sv/blob/master/input_templates/terra_workspaces/cohort_mode/samples_1kgp.tsv.tmpl). 
+   [here in the GATK-SV GitHub repository](https://github.com/broadinstitute/gatk-sv/blob/main/inputs/templates/terra_workspaces/cohort_mode/samples_1kgp_156.tsv.tmpl). 
    To upload the TSV file, navigate to the *Data* tab of the workspace, click the `Import Data` button on the top left, 
    and select "Upload TSV".
    <img alt="uploading a TSV data table" title="How to upload a TSV data table" src="https://i.imgur.com/1ZtwseH.png" width="300" />
@@ -144,12 +143,13 @@ Refer to [the sample ID requirements section](/docs/gs/inputs#sampleids) of the
 
 ### Creating sample_sets
 
-To create batches (in the `sample_set` table), the easiest way is to upload a tab-separated sample set membership file. 
+To create batches (in the `sample_set` table), we recommend using the `Batching.ipynb` notebook (see [batching](#batching)). 
+To create batches manually, the easiest way is to upload a tab-separated sample set membership file. 
 This file should have one line per sample, plus a header (first) line. The first column should be 
 `membership:sample_set_id` (containing the `sample_set_id` for the sample in question), and the second should be 
 `sample` (containing the sample IDs). Recall that batch IDs (`sample_set_id`) should follow the 
-[sample ID requirements](/docs/gs/inputs#sampleids). For an example sample membership file, refer to the one for the 
-1000 Genomes samples in this workspace [here in the GATK-SV GitHub repository](https://github.com/broadinstitute/gatk-sv/blob/master/input_templates/terra_workspaces/cohort_mode/sample_set_membership_1kgp.tsv.tmpl).
+[sample ID requirements](/docs/gs/inputs#sampleids). For an example sample set membership file, refer to
+[this one in the GATK-SV GitHub repository](https://github.com/broadinstitute/gatk-sv/blob/main/inputs/templates/terra_workspaces/cohort_mode/sample_set_membership_1kgp.tsv.tmpl).
 
 ## Workflow instructions {#instructions}
 
@@ -196,20 +196,8 @@ partitions. Make sure to try just one sample first though!
 * Refer to the [Input Data section](/docs/gs/inputs) for details on file formats, sample QC, and sample ID restrictions.
 * It is normal for a few samples in a cohort to run out of memory during Wham SV calling, so we recommend enabling 
 auto-retry for out-of-memory errors for `01-GatherSampleEvidence` only. Before you launch the workflow, click the 
-checkbox reading "Retry with more memory" and set the memory retry factor to 1.8. This action must be performed each 
+checkbox reading "Retry with more memory" and set the memory retry factor to 2. This action must be performed each 
 time you launch a `01-GatherSampleEvidence` job.
-* Please note that most large published joint call sets produced by GATK-SV, including gnomAD-SV, included the tool 
-MELT, a state-of-the-art mobile element insertion (MEI) detector, as part of the pipeline. Due to licensing 
-restrictions, we cannot provide a public docker image for this algorithm. The `01-GatherSampleEvidence` workflow 
-does not use MELT as one of the SV callers by default, which will result in less sensitivity to MEI calls. In order 
-to use MELT, you will need to build your own private docker image (example Dockerfile 
-[here](https://github.com/broadinstitute/gatk-sv/blob/master/dockerfiles/melt/Dockerfile)), share it with your Terra 
-proxy account, enter it in the `melt_docker` input in the `01-GatherSampleEvidence` configuration (as a string, 
-surrounded by double-quotes), and then click "Save". No further changes are necessary beyond `01-GatherSampleEvidence`.
-   * Note that the version of MELT tested with GATK-SV is v2.0.5. If you use a different version to create your own 
-  docker image, we recommend testing your image by running one pilot sample through `01-GatherSampleEvidence` to check 
-  that it runs as expected, then running a small group of about 10 pilot samples through the pipeline until the end of 
-  `04-GatherBatchEvidence` to check that the outputs are compatible with GATK-SV.
 * If you enable "Delete intermediate outputs" whenever you launch this workflow (recommended), BAM files will be 
 deleted for successful runs; but BAM files will not be deleted if the run fails or if intermediate file deletion is 
 not enabled. Since BAM files are large, we recommend deleting them to save on storage costs, but only after fixing and 
@@ -220,37 +208,27 @@ re-running the failed workflow, so that it will call-cache.
 
 Read the full EvidenceQC documentation [here](/docs/modules/eqc).
 * `02-EvidenceQC` is run on arbitrary cohort partitions of up to 500 samples.
-* The outputs from `02-EvidenceQC` can be used for 
-[preliminary sample QC](/docs/modules/eqc#preliminary-sample-qc) and 
+* The outputs from `02-EvidenceQC` can be used for [sample QC](#sample-qc) and 
 [batching](#batching) before moving on to [TrainGCNV](#traingcnv).
 
 
-### Batching (manual step) {#batching}
+### Sample QC (notebook) {#sample-qc}
+Read the documentation on preliminary sample QC [here](/docs/modules/eqc#preliminary-sample-qc).
+Follow the `SampleQC.ipynb` notebook step-by-step to evaluate sample data quality and remove low-quality samples as needed.
+The notebook will produce a table of passing samples to use for [batching](#batching).
 
-For larger cohorts, samples should be split up into batches of about 100-500
-samples with similar characteristics. We recommend batching based on overall
-coverage and dosage score (WGD), which can be generated in [EvidenceQC](/docs/modules/eqc).
-An example batching process is outlined below:
 
-1. Divide the cohort into PCR+ and PCR- samples
-2. Partition the samples by median coverage from [EvidenceQC](/docs/modules/eqc),
-   grouping samples with similar median coverage together. The end goal is to
-   divide the cohort into roughly equal-sized batches of about 100-500 samples;
-   if your partitions based on coverage are larger or uneven, you can partition
-   the cohort further in the next step to obtain the final batches.
-3. Optionally, divide the samples further by dosage score (WGD) from
-   [EvidenceQC](/docs/modules/eqc), grouping samples with similar WGD score
-   together, to obtain roughly equal-sized batches of about 100-500 samples
-4. Maintain a roughly equal sex balance within each batch, based on sex
-   assignments from [EvidenceQC](/docs/modules/eqc)
+### Batching (notebook) {#batching}
+Read the documentation on batching [here](/docs/modules/eqc#batching).
+If necessary, follow the `Batching.ipynb` notebook step-by-step to divide samples into batches
+and create corresponding `sample_sets` for use in `03-TrainGCNV` and beyond.
 
 
 ### 03-TrainGCNV {#traingcnv}
 
 Read the full TrainGCNV documentation [here](/docs/modules/gcnv).
-* Before running this workflow, create the batches (~100-500 samples) you will use for the rest of the pipeline based 
-on sample coverage, WGD score (from `02-EvidenceQC`), and PCR status. These will likely not be the same as the batches 
-you used for `02-EvidenceQC`.
+* Before running this workflow, create the batches (~100-500 samples) you will use for the rest of the pipeline according 
+to the [batching](#batching) instructions. These will likely not be the same as the batches you used for `02-EvidenceQC`.
 * By default, `03-TrainGCNV` is configured to be run once per `sample_set` on 100 randomly-chosen samples from that 
 set to create a gCNV model for each batch. To modify this behavior, you can set the `n_samples_subsample` parameter 
 to the number of samples to use for training.
@@ -309,15 +287,10 @@ that follows the [sample ID requirements](/docs/gs/inputs#sampleids).
 Read the full GenotypeBatch documentation [here](/docs/modules/gb).
 * Use the same `sample_set` definitions you used for `03-TrainGCNV` through `08-FilterBatchSamples`.
 
-### Steps 11-17
+### Steps 11-20
 
-Read the full documentation for [RegenotypeCNVs](/docs/modules/rgcnvs), [MakeCohortVcf](/docs/modules/cvcf) (which 
-includes `CombineBatches`, `ResolveComplexVariants`, `GenotypeComplexVariants`, `CleanVcf`, `MainVcfQc`), and 
-[AnnotateVcf](/docs/modules/av).
+Read the full documentation for [RegenotypeCNVs](/docs/modules/rgcnvs), [CombineBatches](/docs/modules/cmb), 
+[ResolveComplexVariants](/docs/modules/rcv), [GenotypeComplexVariants](/docs/modules/gcv), [CleanVcf](/docs/modules/cvcf), 
+[RefineComplexVariants](/docs/modules/refcv), [JoinRawCalls](/docs/modules/jrc), [SVConcordance](/docs/modules/svc), 
+[FilterGenotypes](/docs/modules/fg), and [AnnotateVcf](/docs/modules/av).
 * Use the same cohort `sample_set_set` you created and used for `09-MergeBatchSites`.
-
-### Additional notes
-
-- The VCF produced by `15-CleanVcf` (and annotated by `17-AnnotateVcf`) prioritizes sensitivity, but additional downstream 
-filtration is recommended to improve specificity.
-
diff --git a/website/docs/execution/single.md b/website/docs/execution/single.md
index 1132a2ac3..a28aa793f 100644
--- a/website/docs/execution/single.md
+++ b/website/docs/execution/single.md
@@ -103,7 +103,9 @@ inputs beyond their defaults.
 
 If you would like to run this workflow on your own samples (which must be medium-to-high coverage WGS data):
 
-- Clone the [workspace](https://app.terra.bio/#workspaces/help-gatk/GATK-Structural-Variants-Single-Sample) into a Terra project you have access to
+- Clone the [workspace](https://app.terra.bio/#workspaces/help-gatk/GATK-Structural-Variants-Single-Sample) into a Terra project you have access to.
+  Select `us-central1` for the region. If you must use a different region, you will need to copy all GATK-SV docker images to the other region
+  before running the pipeline. See the [docker images section](/docs/gs/dockers#regions-important) for details.
 - In the cloned workspace, upload rows to the Sample and (optionally) the Participant Data Table that describe your samples.
   Ensure that the rows you add to the Sample table contain the columns `sample_id` and `bam_or_cram_file` are populated appropriately.
 - There is no need to modify values in the workspace data or method configuration. If you are interested in modifying the reference
diff --git a/website/docs/gs/calling_modes.md b/website/docs/gs/calling_modes.md
index dcafa5634..df8b4651b 100644
--- a/website/docs/gs/calling_modes.md
+++ b/website/docs/gs/calling_modes.md
@@ -20,7 +20,8 @@ use cases:
 
 Users should also consider that the single-sample mode is provided as a single workflow and is therefore considerably 
 simpler to run than joint calling. However, it also has higher compute costs on a per-sample basis and will not be as sensitive 
-as joint calling with larger cohorts.
+as joint calling with larger cohorts. Additionally, SV quality will be best when the case sample closely resembles the samples
+in the reference panel in terms of sequencing depth, sample quality, and library preparation.
 
 ## Joint calling mode
 
diff --git a/website/docs/intro.md b/website/docs/intro.md
index 413f28245..a0e17b3fa 100644
--- a/website/docs/intro.md
+++ b/website/docs/intro.md
@@ -5,14 +5,14 @@ sidebar_position: 1
 ---
 
 GATK-SV is a comprehensive, cloud-based ensemble pipeline for discovering and annotating all 
-classes of structural variants (SV) from whole genome sequencing (WGS) data. It can detect 
+classes of structural variants (SV) from short-read whole genome sequencing (WGS) data. It can detect 
 deletions, duplications, multi-allelic copy number variants, balanced inversions, 
 insertions, translocations, and a diverse spectrum of complex SV. Briefly, GATK-SV 
 maximizes the sensitivity of SV discovery by harmonizing output from five tools: 
-Manta, Wham, Scramble, cnMOPS, and GATK-gCNV. To minimize false positives, raw SVs 
-are adjudicated and re-genotyped from read evidence considering all potential 
+Manta, Wham, Scramble, cn.MOPS, and GATK-gCNV. To minimize false positives, raw SVs 
+are adjudicated and re-genotyped, considering all potential 
 sequencing evidence including anomalous paired-end (PE) reads, split reads (SR), 
-read-depth (RD), and B-allele frequencies (BAF). It also fully resolves 16 classes of complex 
+read-depth (RD), and B-allele frequencies (BAF). It also fully resolves 11 classes of complex 
 SVs composed of multiple breakpoints. GATK-SV is intended for use on the [Terra](https://app.terra.bio/) 
  platform.
 
diff --git a/website/docs/modules/annotate_vcf.md b/website/docs/modules/annotate_vcf.md
index 1ec30cc47..b0df24e5c 100644
--- a/website/docs/modules/annotate_vcf.md
+++ b/website/docs/modules/annotate_vcf.md
@@ -75,7 +75,7 @@ If provided, sex-specific allele frequencies will be annotated.
 Pseudo-autosomal region (PAR) bed file. If provided, variants overlapping PARs will be annotated with the `PAR` field.
 
 #### `sv_per_shard`
-Shard sized for parallel processing. Decreasing this may help if the workflow is running too slowly.
+Shard size for parallel processing. Decreasing this may help if the workflow is running too slowly.
 
 #### <HighlightOptionalArg>Optional</HighlightOptionalArg> `external_af_ref_bed`
 Reference SV set (see [here](/docs/resources#external_af_ref_bed)). If provided, annotates variants with allele frequencies 
diff --git a/website/docs/modules/apply_manual_filter.md b/website/docs/modules/apply_manual_filter.md
deleted file mode 100644
index 6c9bd7058..000000000
--- a/website/docs/modules/apply_manual_filter.md
+++ /dev/null
@@ -1,57 +0,0 @@
----
-title: ApplyManualVariantFilter
-description: Complex SV genotyping
-sidebar_position: 16
-slug: amvf
----
-
-[WDL source code](https://github.com/broadinstitute/gatk-sv/blob/main/wdl/ApplyManualVariantFilter.wdl)
-
-This module hard-filters variants (dropping records) using [bcftools](https://github.com/samtools/bcftools). While the 
-workflow is general-purpose, we recommend running it with default parameters to eliminate major sources of false 
-positive variants:
-
-1. Deletions called solely by `Wham`.
-2. SVA MEIs called by `Scramble` with the `HIGH_SR_BACKGROUND` flag.
-
-The following diagram illustrates the recommended invocation order:
-
-```mermaid
-
-stateDiagram
-  direction LR
-    
-  classDef inModules stroke-width:0px,fill:#caf0f8,color:#00509d
-  classDef thisModule font-weight:bold,stroke-width:0px,fill:#ff9900,color:white
-  classDef outModules stroke-width:0px,fill:#caf0f8,color:#00509d
-
-  refcv: RefineComplexVariants
-  amvf: ApplyManualVariantFilter
-  svc: SVConcordance
-  refcv --> amvf
-  amvf --> svc
-  
-  class amvf thisModule
-  class refcv inModules
-  class svc outModules
-```
-
-### Inputs
-
-#### `prefix`
-Prefix for the output VCF, such as the cohort name. May be alphanumeric with underscores.
-
-#### `vcf`
-Any VCF. Running on the [cleaned VCF](cvcf#cleaned_vcf) is recommended.
-
-#### `filter_name`
-A name for the filter, used for output file naming. May be alphanumeric with underscores.
-
-#### `bcftools_filter`
-[Bcftools EXPRESSION](https://samtools.github.io/bcftools/bcftools.html#expressions) to use for filtering. Variants 
-matching this expression will be **excluded**, i.e. with the `-e` argument.
-
-### Outputs
-
-#### `manual_filtered_vcf`
-Filtered VCF.
diff --git a/website/docs/modules/concordance.md b/website/docs/modules/concordance.md
index 16e8814ec..b30ef86ed 100644
--- a/website/docs/modules/concordance.md
+++ b/website/docs/modules/concordance.md
@@ -25,16 +25,16 @@ stateDiagram
   classDef thisModule font-weight:bold,stroke-width:0px,fill:#ff9900,color:white
   classDef outModules stroke-width:0px,fill:#caf0f8,color:#00509d
 
-  amvf: ApplyManualVariantFilter
+  refcv: RefineComplexVariants
   jrc: JoinRawCalls
   svc: SVConcordance
   fg: FilterGenotypes
-  amvf --> svc
+  refcv --> svc
   jrc --> svc
   svc --> fg
   
   class svc thisModule
-  class amvf inModules
+  class refcv inModules
   class jrc inModules
   class fg outModules
 ```
@@ -45,7 +45,7 @@ stateDiagram
 Prefix for the output VCF, such as the cohort name. May be alphanumeric with underscores.
 
 #### `eval_vcf`
-VCF to annotate. In the recommended pipeline, this is generated in [ApplyManualVariantFilter](./amvf).
+VCF to annotate. In the recommended pipeline, this is generated in [RefineComplexVariants](./refcv).
 
 #### `truth_vcf`
 VCF to compare against. This should contain the same samples as `eval_vcf`. In the recommended pipeline, this is 
diff --git a/website/docs/modules/evidence_qc.md b/website/docs/modules/evidence_qc.md
index a48b3e0a8..6d1a35328 100644
--- a/website/docs/modules/evidence_qc.md
+++ b/website/docs/modules/evidence_qc.md
@@ -10,13 +10,7 @@ import { Highlight, HighlightOptionalArg } from "../../src/components/highlight.
 [WDL source code](https://github.com/broadinstitute/gatk-sv/blob/main/wdl/EvidenceQC.wdl)
 
 Runs ploidy estimation, dosage scoring, and optionally VCF QC. 
-The results from this module can be used for QC and batching.
-
-For large cohorts, this workflow can be run on arbitrary cohort 
-partitions of up to about 500 samples. Afterward, we recommend 
-using the results to divide samples into smaller batches (~100-500 samples) 
-with ~1:1 male:female ratio. Refer to the [Batching](/docs/execution/joint#batching) section 
-for further guidance on creating batches.
+The results from this module can be used for [QC](#preliminary-sample-qc) and [batching](#batching).
 
 We also recommend using sex assignments generated from the ploidy 
 estimates and incorporating them into the PED file, with sex = 0 for sex aneuploidies.
@@ -77,6 +71,34 @@ stage if necessary. Here are a few of the basic QC checks that we recommend:
 - Remove samples with autosomal aneuploidies based on
   the per-batch binned coverage plots of each chromosome.
 
+In the joint calling mode Terra workspace, we provide a Jupyter notebook `SampleQC.ipynb`
+for sample QC and filtering.
+
+
+### Batching
+
+For larger cohorts, samples should be split up into batches of about 100-500
+samples with similar characteristics. We recommend batching based on overall
+coverage and dosage score (WGD), which is generated in EvidenceQC.
+You may also wish to batch samples based on other characteristics that could
+impact SV calling, such as mean insert size or PCR status.
+An example batching process is outlined below:
+
+1. Divide the cohort by chromosome X ploidy (less than 2, greater than or equal to 2)
+   based on copy ratio estimates from EvidenceQC. In this way, males and females will be
+   batched separately before being merged back together for batches with equal sex balance
+2. Partition the samples by median coverage from EvidenceQC,
+   grouping samples with similar median coverage together
+3. Partition the samples further by dosage score (WGD) from
+   EvidenceQC, grouping samples with similar WGD score together
+4. Optionally, partition the samples further by mean insert size if available,
+   grouping samples with similar mean insert size together
+5. Merge corresponding male and female partitions together to generate
+   roughly equally sized batches of 100-500 samples with roughly equal sex balance
+
+In the joint calling mode Terra workspace, we provide a Jupyter notebook `Batching.ipynb`
+for batch creation.
+
 
 ### Inputs
 
diff --git a/website/docs/modules/filter_batch.md b/website/docs/modules/filter_batch.md
index 9645a6223..94d28f281 100644
--- a/website/docs/modules/filter_batch.md
+++ b/website/docs/modules/filter_batch.md
@@ -11,16 +11,14 @@ import { Highlight, HighlightOptionalArg } from "../../src/components/highlight.
 
 Filters poor quality variants and outlier samples. 
 This workflow can be run all at once with the top-level WDL, 
-or it can be run in three steps to enable tuning of outlier 
-filtration cutoffs. The three subworkflows are:
+or it can be run in two steps to enable tuning of outlier 
+filtration cutoffs. The two subworkflows are:
 
-1. [FilterBatchSites](https://github.com/broadinstitute/gatk-sv/blob/main/wdl/FilterBatchSites.wdl): Per-batch variant filtration
+1. [FilterBatchSites](https://github.com/broadinstitute/gatk-sv/blob/main/wdl/FilterBatchSites.wdl): Per-batch variant filtration. 
+    Visualize filtered SV counts per sample per type to help choose an IQR cutoff for outlier sample filtering, and preview
+    outlier samples for a given cutoff.
 
-2. [PlotSVCountsPerSample](https://github.com/broadinstitute/gatk-sv/blob/main/wdl/PlotSVCountsPerSample.wdl): Visualize SV counts per 
-   sample per type to help choose an IQR cutoff for 
-   outlier filtering, and preview outlier samples for a given cutoff
-
-3. [FilterBatchSamples](https://github.com/broadinstitute/gatk-sv/blob/main/wdl/FilterBatchSamples.wdl): Per-batch outlier sample filtration; 
+2. [FilterBatchSamples](https://github.com/broadinstitute/gatk-sv/blob/main/wdl/FilterBatchSamples.wdl): Per-batch outlier sample filtration; 
    provide an appropriate [outlier_cutoff_nIQR](#outlier_cutoff_niqr) based on the 
    SV count plots and outlier previews from step 2. Note 
    that not removing high outliers can result in increased 
diff --git a/website/docs/modules/index.md b/website/docs/modules/index.md
index e378cbbc4..df25dd1c3 100644
--- a/website/docs/modules/index.md
+++ b/website/docs/modules/index.md
@@ -10,7 +10,7 @@ implemented as a single runnable workflow.
 
 The following diagram illustrates the overall module ordering:
 
-<img alt="pipeline_diagram" title="Pipeline diagram" src="https://media.githubusercontent.com/media/broadinstitute/gatk-sv/0d4d44516bbd565e40e4b5b59ffef51e0b8c3c94/terra_pipeline_diagram.jpg" width="1000" />
+<img alt="pipeline_diagram" title="Pipeline diagram" src="https://media.githubusercontent.com/media/broadinstitute/gatk-sv/v1.0/terra_pipeline_diagram.jpg" width="1000" />
 
 Each module is implemented in the [Workflow Description Language (WDL)](https://openwdl.org). The Terra workspaces come 
 pre-configured with default values for all required parameters and set up to run the pipeline for most use cases. 
diff --git a/website/docs/modules/main_vcf_qc.md b/website/docs/modules/main_vcf_qc.md
index d362765b4..cfc397698 100644
--- a/website/docs/modules/main_vcf_qc.md
+++ b/website/docs/modules/main_vcf_qc.md
@@ -41,14 +41,14 @@ increases, and these examples are intended to provide a simple baseline for acce
 the [Recommendations](#recommendations) section for prescribed quality criteria.
 
 :::note
-The following plots are of variants passing all filters (i.e. with the `FILTER` status set to `PASS`). This is the 
+The following plots are of variants passing all filters (i.e. with the `FILTER` status set to `PASS` or `MULTIALLELIC`). This is the 
 default behavior of the QC plots generated in [FilterGenotypes](./fg). 
 
 When running `MainVcfQc` as a standalone workflow, users may set the 
 [bcftools_preprocessing_options](#optional--bcftools_preprocessing_options) argument to limit plotted variants based on `FILTER` status.
-For example, to limit to `PASS` variants for a VCF generated from [FilterGenotypes](./fg) use:
+For example, to limit to `PASS` and `MULTIALLELIC` variants for a VCF generated from [FilterGenotypes](./fg) use:
 ```
-"bcftools_preprocessing_options": "-i 'FILTER~\"PASS\""
+"bcftools_preprocessing_options": "-i 'FILTER=\"PASS\" || FILTER=\"MULTIALLELIC\"'"
 ```
 :::
 
diff --git a/website/docs/modules/refine_cpx.md b/website/docs/modules/refine_cpx.md
index 6ed5e0f41..b730744ff 100644
--- a/website/docs/modules/refine_cpx.md
+++ b/website/docs/modules/refine_cpx.md
@@ -24,14 +24,14 @@ stateDiagram
 
   cvcf: CleanVcf
   refcv: RefineComplexVariants
-  amvf: ApplyManualVariantFilter
+  svc: SVConcordance
   
   cvcf --> refcv
-  refcv --> amvf
+  refcv --> svc
   
   class refcv thisModule
   class cvcf inModules
-  class amvf outModules
+  class svc outModules
 ```
 
 ### Inputs

From fb6720a8bbc41f00ebea6925e45dac293ec68564 Mon Sep 17 00:00:00 2001
From: Mark Walker <markw@broadinstitute.org>
Date: Fri, 1 Nov 2024 09:59:50 -0400
Subject: [PATCH 2/2] Add links to GATK forums and joint calling workspace
 (#741)

---
 website/docs/execution/joint.md                  |  2 +-
 website/docs/troubleshooting/faq.md              |  1 +
 website/docs/troubleshooting/support.md          | 10 ++++++++++
 website/src/components/HomepageFeatures/index.js |  2 +-
 4 files changed, 13 insertions(+), 2 deletions(-)
 create mode 100644 website/docs/troubleshooting/support.md

diff --git a/website/docs/execution/joint.md b/website/docs/execution/joint.md
index fdb0378a1..4105e4edc 100644
--- a/website/docs/execution/joint.md
+++ b/website/docs/execution/joint.md
@@ -6,7 +6,7 @@ slug: joint
 ---
 
 ## Terra workspace
-Users should clone the Terra joint calling workspace (TODO)
+Users should clone the [Terra joint calling workspace](https://app.terra.bio/#workspaces/broad-firecloud-dsde-methods/GATK-Structural-Variants-Joint-Calling)
 which is configured with a demo sample set. 
 Refer to the following sections for instructions on how to run the pipeline on your data using this workspace.
 
diff --git a/website/docs/troubleshooting/faq.md b/website/docs/troubleshooting/faq.md
index 439cbf391..b84233616 100644
--- a/website/docs/troubleshooting/faq.md
+++ b/website/docs/troubleshooting/faq.md
@@ -1,6 +1,7 @@
 ---
 title: FAQ
 slug: faq
+sidebar_position: 0
 ---
 
 Please consult the following resources for additional troubleshooting guides:
diff --git a/website/docs/troubleshooting/support.md b/website/docs/troubleshooting/support.md
new file mode 100644
index 000000000..fb85ce632
--- /dev/null
+++ b/website/docs/troubleshooting/support.md
@@ -0,0 +1,10 @@
+---
+title: User support
+slug: support
+sidebar_position: 1
+---
+
+For guidance troubleshooting error messages, please review our [FAQ](./faq). 
+
+If you still cannot resolve your issue, reach out to our support team on the 
+[GATK forums](https://gatk.broadinstitute.org/hc/en-us/community/topics).
diff --git a/website/src/components/HomepageFeatures/index.js b/website/src/components/HomepageFeatures/index.js
index e5ce0b381..bfb9eeaff 100644
--- a/website/src/components/HomepageFeatures/index.js
+++ b/website/src/components/HomepageFeatures/index.js
@@ -18,7 +18,7 @@ const accessibleFeatures = [
     ),
     buttons: [
       {
-        buttonLink: 'https://app.terra.bio',
+        buttonLink: 'https://app.terra.bio/#workspaces/broad-firecloud-dsde-methods/GATK-Structural-Variants-Joint-Calling',
         buttonText: 'Joint Calling Workspace'
       },
       {