From c635e1df9d464cbe9bcf87e51c991afbb86795d5 Mon Sep 17 00:00:00 2001 From: Miguel Covarrubias Date: Tue, 19 Nov 2024 17:03:50 -0500 Subject: [PATCH 1/2] Echo sample_chromosome_ploidy schema updates for AoU RWB [VS-1500] --- scripts/variantstore/wdl/GvsAssignIds.wdl | 6 +++++- .../hellbender/tools/gvs/common/SchemaUtils.java | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/variantstore/wdl/GvsAssignIds.wdl b/scripts/variantstore/wdl/GvsAssignIds.wdl index 3668af140a8..b34e3d54c07 100644 --- a/scripts/variantstore/wdl/GvsAssignIds.wdl +++ b/scripts/variantstore/wdl/GvsAssignIds.wdl @@ -28,7 +28,11 @@ workflow GvsAssignIds { String vcf_header_lines_schema_json = '[{"name":"vcf_header_lines_hash","type":"STRING","mode":"REQUIRED"}, {"name":"vcf_header_lines","type":"STRING","mode":"REQUIRED"},{"name":"is_expected_unique","type":"BOOLEAN","mode":"REQUIRED"}]' String sample_vcf_header_schema_json = '[{"name": "sample_id","type": "INTEGER","mode": "REQUIRED"}, {"name":"vcf_header_lines_hash","type":"STRING","mode":"REQUIRED"}]' String sample_load_status_schema_json = '[{"name": "sample_id","type": "INTEGER","mode": "REQUIRED"},{"name":"status","type":"STRING","mode":"REQUIRED"}, {"name":"event_timestamp","type":"TIMESTAMP","mode":"REQUIRED"}]' - String sample_chromosome_ploidy_schema_json = '[{"name": "sample_id","type": "INTEGER","mode": "REQUIRED"},{"name": "chromosome","type": "INTEGER","mode": "REQUIRED"},{"name": "ploidy","type": "INTEGER","mode": "REQUIRED"}]' + # The schema below for the `sample_chromosome_ploidy` table matches that in the AoU `echo` dataset, which differs from + # the standard `sample_chromosome_ploidy` schema by the addition of the `genotype` column. At the time of this writing + # it is not expected that any additional samples will be loaded into the `echo` dataset, but if there are additional + # samples then this code will match the actual `echo` schema. + String sample_chromosome_ploidy_schema_json = '[{"name": "sample_id","type": "INTEGER","mode": "REQUIRED"},{"name": "chromosome","type": "INTEGER","mode": "REQUIRED"},{"name":"genotype","type":"STRING","mode":"NULLABLE"},{"name": "ploidy","type": "INTEGER","mode": "REQUIRED"}]' if (!defined(git_hash) || !defined(cloud_sdk_docker)) { call Utils.GetToolVersions { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/gvs/common/SchemaUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/gvs/common/SchemaUtils.java index 4f346fb7e85..fd06c77f978 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/gvs/common/SchemaUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/gvs/common/SchemaUtils.java @@ -79,7 +79,7 @@ public class SchemaUtils { CALL_PGT, CALL_PID, CALL_PS); - public static final List SAMPLE_PLOIDY_FIELDS = Arrays.asList(CHROMOSOME, SAMPLE_ID, PLOIDY); + public static final List SAMPLE_PLOIDY_FIELDS = Arrays.asList(CHROMOSOME, SAMPLE_ID, GENOTYPE, PLOIDY); public static final List EXTRACT_REF_FIELDS = Arrays.asList(LOCATION_FIELD_NAME, SAMPLE_ID_FIELD_NAME, LENGTH_FIELD_NAME, STATE_FIELD_NAME); From 573e42063d107a5abbc0b69198c865043d0a8e4e Mon Sep 17 00:00:00 2001 From: Miguel Covarrubias Date: Tue, 19 Nov 2024 19:05:07 -0500 Subject: [PATCH 2/2] wip --- scripts/variantstore/wdl/GvsUtils.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/variantstore/wdl/GvsUtils.wdl b/scripts/variantstore/wdl/GvsUtils.wdl index 09db3847603..55d4b2645ce 100644 --- a/scripts/variantstore/wdl/GvsUtils.wdl +++ b/scripts/variantstore/wdl/GvsUtils.wdl @@ -74,7 +74,7 @@ task GetToolVersions { String cloud_sdk_slim_docker = "gcr.io/google.com/cloudsdktool/cloud-sdk:435.0.0-slim" String variants_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/variants:2024-11-05-alpine-a55d2253d280" String variants_nirvana_docker = "us.gcr.io/broad-dsde-methods/variantstore:nirvana_2022_10_19" - String gatk_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/gatk:2024-10-24-gatkbase-b29b46ab0443" + String gatk_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/gatk:2024-11-19-gatkbase-b01d93162302" String real_time_genomics_docker = "docker.io/realtimegenomics/rtg-tools:latest" String gotc_imputation_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623" String plink_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/plink2:2024-04-23-slim-a0a65f52cc0e"