From feaaf6d59b8ccc128f233a983e29d538e7f28c86 Mon Sep 17 00:00:00 2001 From: gatk-sv-bot <101641599+gatk-sv-bot@users.noreply.github.com> Date: Tue, 14 Jan 2025 15:59:33 +0000 Subject: [PATCH 1/3] Update docker images list, triggered by 88dbd052 --- inputs/values/dockers.json | 6 +++--- inputs/values/dockers_azure.json | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/inputs/values/dockers.json b/inputs/values/dockers.json index 0bbb990d4..cd8eb2d27 100644 --- a/inputs/values/dockers.json +++ b/inputs/values/dockers.json @@ -12,8 +12,8 @@ "samtools_cloud_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/samtools-cloud:2024-10-25-v0.29-beta-5ea22a52", "sv_base_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-base:2024-10-25-v0.29-beta-5ea22a52", "sv_base_mini_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-base-mini:2024-10-25-v0.29-beta-5ea22a52", - "sv_pipeline_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2024-11-15-v1.0-488d7cb0", - "sv_pipeline_qc_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2024-11-15-v1.0-488d7cb0", + "sv_pipeline_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2025-01-14-v1.0.1-88dbd052", + "sv_pipeline_qc_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2025-01-14-v1.0.1-88dbd052", "wham_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/wham:2024-10-25-v0.29-beta-5ea22a52", "igv_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/igv:mw-xz-fixes-2-b1be6a9", "duphold_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/duphold:mw-xz-fixes-2-b1be6a9", @@ -28,5 +28,5 @@ "sv_utils_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-utils:2025-01-06-v1.0.1-e902bf4e", "gq_recalibrator_docker": "us.gcr.io/broad-dsde-methods/markw/gatk:mw-tb-form-sv-filter-training-data-899360a", "str": "us.gcr.io/broad-dsde-methods/gatk-sv/str:2023-05-23-v0.27.3-beta-e537bdd6", - "denovo": "us.gcr.io/broad-dsde-methods/gatk-sv/denovo:2024-11-15-v1.0-488d7cb0" + "denovo": "us.gcr.io/broad-dsde-methods/gatk-sv/denovo:2025-01-14-v1.0.1-88dbd052" } \ No newline at end of file diff --git a/inputs/values/dockers_azure.json b/inputs/values/dockers_azure.json index fd04b6f50..d46600acb 100644 --- a/inputs/values/dockers_azure.json +++ b/inputs/values/dockers_azure.json @@ -12,8 +12,8 @@ "samtools_cloud_docker": "vahid.azurecr.io/gatk-sv/samtools-cloud:2024-10-25-v0.29-beta-5ea22a52", "sv_base_docker": "vahid.azurecr.io/gatk-sv/sv-base:2024-10-25-v0.29-beta-5ea22a52", "sv_base_mini_docker": "vahid.azurecr.io/gatk-sv/sv-base-mini:2024-10-25-v0.29-beta-5ea22a52", - "sv_pipeline_docker": "vahid.azurecr.io/gatk-sv/sv-pipeline:2024-11-15-v1.0-488d7cb0", - "sv_pipeline_qc_docker": "vahid.azurecr.io/gatk-sv/sv-pipeline:2024-11-15-v1.0-488d7cb0", + "sv_pipeline_docker": "vahid.azurecr.io/gatk-sv/sv-pipeline:2025-01-14-v1.0.1-88dbd052", + "sv_pipeline_qc_docker": "vahid.azurecr.io/gatk-sv/sv-pipeline:2025-01-14-v1.0.1-88dbd052", "wham_docker": "vahid.azurecr.io/gatk-sv/wham:2024-10-25-v0.29-beta-5ea22a52", "igv_docker": "vahid.azurecr.io/gatk-sv/igv:mw-xz-fixes-2-b1be6a9", "duphold_docker": "vahid.azurecr.io/gatk-sv/duphold:mw-xz-fixes-2-b1be6a9", @@ -28,5 +28,5 @@ "sv_utils_docker": "vahid.azurecr.io/gatk-sv/sv-utils:2025-01-06-v1.0.1-e902bf4e", "gq_recalibrator_docker": "vahid.azurecr.io/markw/gatk:mw-tb-form-sv-filter-training-data-899360a", "str": "vahid.azurecr.io/gatk-sv/str:2023-05-23-v0.27.3-beta-e537bdd6", - "denovo": "vahid.azurecr.io/gatk-sv/denovo:2024-11-15-v1.0-488d7cb0" + "denovo": "vahid.azurecr.io/gatk-sv/denovo:2025-01-14-v1.0.1-88dbd052" } \ No newline at end of file From 55e74d7c538dd8accb8363d887fed2843ea1636e Mon Sep 17 00:00:00 2001 From: Vahid Date: Tue, 21 Jan 2025 09:55:01 -0500 Subject: [PATCH 2/3] Update gatk docker on azure (#770) --- inputs/values/dockers_azure.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inputs/values/dockers_azure.json b/inputs/values/dockers_azure.json index d46600acb..8ccc5a005 100644 --- a/inputs/values/dockers_azure.json +++ b/inputs/values/dockers_azure.json @@ -2,7 +2,7 @@ "name": "dockers", "cnmops_docker": "vahid.azurecr.io/gatk-sv/cnmops:2024-11-08-v1.0-62adb329", "condense_counts_docker": "vahid.azurecr.io/tsharpe/gatk:4.2.6.1-57-g9e03432", - "gatk_docker": "vahid.azurecr.io/gatk-sv/gatk:2024-07-02-4.6.0.0-1-g4af2b49e9-NIGHTLY-SNAPSHOT", + "gatk_docker": "vahid.azurecr.io/gatk-sv/gatk:2024-12-05-4.6.1.0-6-gfc248dfc1-NIGHTLY-SNAPSHOT", "gatk_docker_pesr_override": "vahid.azurecr.io/tsharpe/gatk:4.2.6.1-57-g9e03432", "genomes_in_the_cloud_docker": "vahid.azurecr.io/genomes-in-the-cloud:2.3.2-1510681135", "linux_docker": "vahid.azurecr.io/google/ubuntu1804", @@ -29,4 +29,4 @@ "gq_recalibrator_docker": "vahid.azurecr.io/markw/gatk:mw-tb-form-sv-filter-training-data-899360a", "str": "vahid.azurecr.io/gatk-sv/str:2023-05-23-v0.27.3-beta-e537bdd6", "denovo": "vahid.azurecr.io/gatk-sv/denovo:2025-01-14-v1.0.1-88dbd052" -} \ No newline at end of file +} From 53419e1aff3792f066591963a28be8ba49649c61 Mon Sep 17 00:00:00 2001 From: Mark Walker Date: Tue, 21 Jan 2025 10:57:03 -0500 Subject: [PATCH 3/3] Remove internal workflows and scripts (#742) --- dockerfiles/duphold/Dockerfile | 16 - dockerfiles/igv/Dockerfile | 80 - dockerfiles/igv/MakeRDtest.py | 263 --- dockerfiles/igv/arial.ttf | Bin 311636 -> 0 bytes dockerfiles/igv/igv.py | 12 - dockerfiles/igv/makeigv_cram.py | 87 - dockerfiles/igv/makeigvpesr_cram.py | 125 -- dockerfiles/igv/makeigvpesr_trio.py | 120 -- dockerfiles/igv/makeigvsplit_cram.py | 87 - dockerfiles/igv/prefs.properties | 15 - dockerfiles/pangenie/Dockerfile | 60 - dockerfiles/rdpesr/Dockerfile | 89 - dockerfiles/rdpesr/Modify_vcf_by_steps.py | 52 - dockerfiles/rdpesr/RdTestV2.R | 1477 ----------------- dockerfiles/rdpesr/add_GC_anno_to_bed.R | 51 - dockerfiles/rdpesr/add_RD_to_SVs.py | 75 - dockerfiles/rdpesr/add_SR_PE_to_PB_INS.V2.py | 147 -- .../rdpesr/add_SR_PE_to_breakpoints.py | 75 - dockerfiles/rdpesr/bincov_to_normCov.R | 18 - dockerfiles/rdpesr/calcu_inheri_stat.py | 108 -- .../rdpesr/install_deprecated_R_package.sh | 11 - dockerfiles/rdpesr/integrate_annotations.R | 105 -- .../rdpesr/integrate_annotations_wo_duphold.R | 80 - .../rdpesr/modify_bed_for_PE_SR_RD_labeling.R | 35 - dockerfiles/rdpesr/modify_pe_metrics.R | 17 - .../ApplyManualVariantFilter.json.tmpl | 7 - .../Module09VisualizeSingleSample.json.tmpl | 40 - inputs/templates/test/Mosaic/Mosaic.json.tmpl | 16 - .../RenameVcfSamples.json.tmpl | 8 - .../SetSampleIdLegacy.json.tmpl | 8 - .../SubsetVcfBySamples.json.tmpl | 7 - inputs/values/dockers.json | 3 - scripts/cromwell/analyze_monitoring_logs.py | 320 ---- scripts/cromwell/analyze_monitoring_logs2.py | 313 ---- .../cromwell/analyze_resource_acquisition.py | 530 ------ scripts/cromwell/copy_cromwell_results.sh | 31 - scripts/cromwell/copy_outputs.py | 71 - .../cromwell/cromwell_monitoring_script.sh | 290 ---- .../cromwell/cromwell_monitoring_script2.sh | 254 --- scripts/cromwell/download_monitoring_logs.py | 117 -- scripts/cromwell/generate_inputs.py | 381 ----- .../cromwell/get_cromwell_resource_usage.sh | 282 ---- .../cromwell/get_cromwell_resource_usage2.sh | 366 ---- scripts/cromwell/get_inputs_outputs.py | 77 - scripts/cromwell/get_output_paths.py | 263 --- scripts/cromwell/launch_wdl.sh | 54 - scripts/cromwell/watch_cromshell.sh | 167 -- scripts/inputs/calibrate_qc_metrics.py | 59 - scripts/inputs/convert_sample_ids.py | 102 ++ scripts/inputs/create_test_batch.py | 132 -- .../get_rename_benchmark_samples_map.py | 125 -- scripts/test/check_gs_urls.py | 77 - scripts/test/compare_files.py | 274 --- scripts/test/metadata.py | 129 -- wdl/AnnoRdPeSr.wdl | 125 -- wdl/AnnotateCleanVcfWithFilteringResults.wdl | 499 ------ wdl/AnnotateILFeatures.wdl | 898 ---------- wdl/ApplyManualVariantFilter.wdl | 85 - wdl/CalcAF.wdl | 180 -- wdl/CombineRegeno.wdl | 58 - wdl/ConcatTextFiles.wdl | 43 - wdl/CramToBam.ReviseBase.wdl | 247 --- wdl/Duphold.wdl | 232 --- wdl/FilterBatchQc.wdl | 148 -- wdl/FilterCleanupQualRecalibration.wdl | 241 --- wdl/FilterOutlierSamplesPostMinGQ.wdl | 404 ----- wdl/GatherSampleEvidence.wdl | 25 +- wdl/Genotype_3.wdl | 71 - wdl/GetSampleID.wdl | 159 -- wdl/IGVGeneratePlotsAllSamples.wdl | 184 -- wdl/IGVGeneratePlotsWholeGenome.wdl | 67 - wdl/IGVTrioPlots.wdl | 83 - wdl/IGVTrioPlotsAllSamples.wdl | 147 -- wdl/MinGQRocOpt.wdl | 226 --- ...Module07FilterCleanupQualRecalibration.wdl | 137 -- wdl/Module07MinGQ.wdl | 924 ----------- wdl/Module07MinGQStep2MergePCRStatus.wdl | 303 ---- wdl/Module07XfBatchEffect.wdl | 689 -------- wdl/Module09VisualizeSingleSample.wdl | 135 -- wdl/Module09VisualizeTrio.wdl | 143 -- wdl/Module10AnnotateILFeatures.wdl | 238 --- wdl/Module10AnnotateRdPeSr.wdl | 70 - wdl/Module10Benchmark.wdl | 550 ------ wdl/Mosaic.wdl | 193 --- wdl/MosaicDepth.wdl | 169 -- wdl/MosaicPesrPart1.wdl | 135 -- wdl/MosaicPesrPart2.wdl | 144 -- wdl/PatchSRBothsidePass.wdl | 133 -- wdl/PatchSRBothsidePassScatter.wdl | 54 - wdl/PreRFCohort.wdl | 186 --- wdl/RdPeSrAnno.wdl | 184 -- wdl/RdTestVisualization.wdl | 113 -- wdl/RenameVcfSamples.wdl | 90 - wdl/ReviseSVtypeINStoMEI.wdl | 60 - wdl/ReviseSVtypeINStoMEIperContig.wdl | 112 -- wdl/SetSampleIdLegacy.wdl | 143 -- wdl/SubsetVcfBySamples.wdl | 35 - wdl/TasksBenchmark.wdl | 339 ---- wdl/Vapor.wdl | 126 +- wdl/XfBatchEffect.wdl | 674 -------- wdl/batch_effect_helper.wdl | 205 --- wdl/prune_add_af.wdl | 120 -- wdl/qcstructs.wdl | 10 - website/docs/gs/input_files.md | 18 +- 104 files changed, 235 insertions(+), 18027 deletions(-) delete mode 100644 dockerfiles/duphold/Dockerfile delete mode 100644 dockerfiles/igv/Dockerfile delete mode 100755 dockerfiles/igv/MakeRDtest.py delete mode 100755 dockerfiles/igv/arial.ttf delete mode 100755 dockerfiles/igv/igv.py delete mode 100755 dockerfiles/igv/makeigv_cram.py delete mode 100755 dockerfiles/igv/makeigvpesr_cram.py delete mode 100755 dockerfiles/igv/makeigvpesr_trio.py delete mode 100755 dockerfiles/igv/makeigvsplit_cram.py delete mode 100755 dockerfiles/igv/prefs.properties delete mode 100644 dockerfiles/pangenie/Dockerfile delete mode 100644 dockerfiles/rdpesr/Dockerfile delete mode 100755 dockerfiles/rdpesr/Modify_vcf_by_steps.py delete mode 100755 dockerfiles/rdpesr/RdTestV2.R delete mode 100755 dockerfiles/rdpesr/add_GC_anno_to_bed.R delete mode 100755 dockerfiles/rdpesr/add_RD_to_SVs.py delete mode 100755 dockerfiles/rdpesr/add_SR_PE_to_PB_INS.V2.py delete mode 100755 dockerfiles/rdpesr/add_SR_PE_to_breakpoints.py delete mode 100755 dockerfiles/rdpesr/bincov_to_normCov.R delete mode 100755 dockerfiles/rdpesr/calcu_inheri_stat.py delete mode 100644 dockerfiles/rdpesr/install_deprecated_R_package.sh delete mode 100644 dockerfiles/rdpesr/integrate_annotations.R delete mode 100644 dockerfiles/rdpesr/integrate_annotations_wo_duphold.R delete mode 100755 dockerfiles/rdpesr/modify_bed_for_PE_SR_RD_labeling.R delete mode 100755 dockerfiles/rdpesr/modify_pe_metrics.R delete mode 100644 inputs/templates/test/ApplyManualVariantFilter/ApplyManualVariantFilter.json.tmpl delete mode 100644 inputs/templates/test/Module09/Module09VisualizeSingleSample.json.tmpl delete mode 100644 inputs/templates/test/Mosaic/Mosaic.json.tmpl delete mode 100644 inputs/templates/test/RenameVcfSamples/RenameVcfSamples.json.tmpl delete mode 100644 inputs/templates/test/SetSampleIdLegacy/SetSampleIdLegacy.json.tmpl delete mode 100644 inputs/templates/test/SubsetVcfBySamples/SubsetVcfBySamples.json.tmpl delete mode 100644 scripts/cromwell/analyze_monitoring_logs.py delete mode 100644 scripts/cromwell/analyze_monitoring_logs2.py delete mode 100644 scripts/cromwell/analyze_resource_acquisition.py delete mode 100755 scripts/cromwell/copy_cromwell_results.sh delete mode 100644 scripts/cromwell/copy_outputs.py delete mode 100755 scripts/cromwell/cromwell_monitoring_script.sh delete mode 100755 scripts/cromwell/cromwell_monitoring_script2.sh delete mode 100644 scripts/cromwell/download_monitoring_logs.py delete mode 100644 scripts/cromwell/generate_inputs.py delete mode 100755 scripts/cromwell/get_cromwell_resource_usage.sh delete mode 100755 scripts/cromwell/get_cromwell_resource_usage2.sh delete mode 100644 scripts/cromwell/get_inputs_outputs.py delete mode 100644 scripts/cromwell/get_output_paths.py delete mode 100755 scripts/cromwell/launch_wdl.sh delete mode 100755 scripts/cromwell/watch_cromshell.sh delete mode 100644 scripts/inputs/calibrate_qc_metrics.py create mode 100644 scripts/inputs/convert_sample_ids.py delete mode 100644 scripts/inputs/create_test_batch.py delete mode 100755 scripts/inputs/get_rename_benchmark_samples_map.py delete mode 100644 scripts/test/check_gs_urls.py delete mode 100644 scripts/test/compare_files.py delete mode 100644 scripts/test/metadata.py delete mode 100644 wdl/AnnoRdPeSr.wdl delete mode 100644 wdl/AnnotateCleanVcfWithFilteringResults.wdl delete mode 100644 wdl/AnnotateILFeatures.wdl delete mode 100644 wdl/ApplyManualVariantFilter.wdl delete mode 100644 wdl/CalcAF.wdl delete mode 100644 wdl/CombineRegeno.wdl delete mode 100644 wdl/ConcatTextFiles.wdl delete mode 100644 wdl/CramToBam.ReviseBase.wdl delete mode 100644 wdl/Duphold.wdl delete mode 100644 wdl/FilterBatchQc.wdl delete mode 100644 wdl/FilterCleanupQualRecalibration.wdl delete mode 100644 wdl/FilterOutlierSamplesPostMinGQ.wdl delete mode 100644 wdl/Genotype_3.wdl delete mode 100644 wdl/GetSampleID.wdl delete mode 100644 wdl/IGVGeneratePlotsAllSamples.wdl delete mode 100644 wdl/IGVGeneratePlotsWholeGenome.wdl delete mode 100755 wdl/IGVTrioPlots.wdl delete mode 100755 wdl/IGVTrioPlotsAllSamples.wdl delete mode 100644 wdl/MinGQRocOpt.wdl delete mode 100644 wdl/Module07FilterCleanupQualRecalibration.wdl delete mode 100644 wdl/Module07MinGQ.wdl delete mode 100644 wdl/Module07MinGQStep2MergePCRStatus.wdl delete mode 100644 wdl/Module07XfBatchEffect.wdl delete mode 100644 wdl/Module09VisualizeSingleSample.wdl delete mode 100755 wdl/Module09VisualizeTrio.wdl delete mode 100644 wdl/Module10AnnotateILFeatures.wdl delete mode 100644 wdl/Module10AnnotateRdPeSr.wdl delete mode 100644 wdl/Module10Benchmark.wdl delete mode 100644 wdl/Mosaic.wdl delete mode 100644 wdl/MosaicDepth.wdl delete mode 100644 wdl/MosaicPesrPart1.wdl delete mode 100644 wdl/MosaicPesrPart2.wdl delete mode 100644 wdl/PatchSRBothsidePass.wdl delete mode 100644 wdl/PatchSRBothsidePassScatter.wdl delete mode 100644 wdl/PreRFCohort.wdl delete mode 100644 wdl/RdPeSrAnno.wdl delete mode 100755 wdl/RdTestVisualization.wdl delete mode 100644 wdl/RenameVcfSamples.wdl delete mode 100644 wdl/ReviseSVtypeINStoMEI.wdl delete mode 100644 wdl/ReviseSVtypeINStoMEIperContig.wdl delete mode 100644 wdl/SetSampleIdLegacy.wdl delete mode 100644 wdl/SubsetVcfBySamples.wdl delete mode 100644 wdl/TasksBenchmark.wdl delete mode 100644 wdl/XfBatchEffect.wdl delete mode 100644 wdl/batch_effect_helper.wdl delete mode 100644 wdl/prune_add_af.wdl delete mode 100644 wdl/qcstructs.wdl diff --git a/dockerfiles/duphold/Dockerfile b/dockerfiles/duphold/Dockerfile deleted file mode 100644 index 41b3e73a0..000000000 --- a/dockerfiles/duphold/Dockerfile +++ /dev/null @@ -1,16 +0,0 @@ -FROM brentp/musl-hts-nim:latest -RUN apk update && apk upgrade && apk add bash && /bin/bash -RUN cd / && \ - git clone -b master --depth 1 git://github.com/brentp/duphold.git && \ - cd duphold && \ - nimble install -y && \ - nim c -d:release -d:danger -o:/usr/bin/duphold --passC:-flto src/duphold && \ - rm -rf /duphold && duphold --help -ENV GATK_JAR="/opt/gatk.jar" -RUN apk add git git-lfs openjdk8 && \ - cd / && git clone https://github.com/broadinstitute/gatk.git && \ - cd gatk && git lfs install && \ - ./gradlew localJar && \ - mv $(readlink -f build/libs/gatk.jar) ${GATK_JAR} && \ - cd / && \ - java -jar ${GATK_JAR} diff --git a/dockerfiles/igv/Dockerfile b/dockerfiles/igv/Dockerfile deleted file mode 100644 index e8b16d7bd..000000000 --- a/dockerfiles/igv/Dockerfile +++ /dev/null @@ -1,80 +0,0 @@ -# This is the barebones image for the GATK SV pipeline that includes only -# some basic bioinformatics utilities. - -# Start with linux base -FROM ubuntu:18.04 - -# Set environment variables. -ENV HOME=/root - - - -#### Basic image utilities -ENV DEBIAN_FRONTEND noninteractive -RUN apt-get update && apt-get install -y --no-install-recommends apt-utils -RUN apt-get update && apt-get install -y r-base - -RUN apt-get update --fix-missing && \ - apt-get upgrade -y && \ - apt-get install -y \ - bcftools \ - bedtools \ - openjdk-8-jdk \ - xvfb \ - zip \ - python3-pip \ - curl \ - git \ - samtools \ - tabix \ - vcftools \ - wget \ - zlib1g-dev && \ - rm -rf /var/lib/apt/lists/* && \ - apt-get -y clean && \ - apt-get -y autoremove && \ - apt-get -y autoclean - -ARG CLOUD_SDK_VERSION=272.0.0 -ENV CLOUD_SDK_VERSION=$CLOUD_SDK_VERSION - - -ARG INSTALL_COMPONENTS -RUN apt-get update -qqy && apt-get install -qqy \ - curl \ - gcc \ - python3-dev \ - python3-setuptools \ - apt-transport-https \ - lsb-release \ - openssh-client \ - git \ - gnupg \ - && pip3 install -U crcmod && \ - pip3 install numpy && \ - pip3 install image && \ - export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" && \ - echo "deb https://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" > /etc/apt/sources.list.d/google-cloud-sdk.list && \ - curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \ - apt-get update && apt-get install -y google-cloud-sdk=${CLOUD_SDK_VERSION}-0 $INSTALL_COMPONENTS && \ - gcloud config set core/disable_usage_reporting true && \ - gcloud config set component_manager/disable_update_check true && \ - gcloud config set metrics/environment github_docker_image && \ - gcloud --version - - -RUN wget http://data.broadinstitute.org/igv/projects/downloads/2.4/IGV_2.4.14.zip && \ - unzip IGV_2.4.14.zip - - -VOLUME ["/root/.config"] -COPY MakeRDtest.py /src/MakeRDtest.py -COPY makeigvpesr_cram.py /src/makeigvpesr_cram.py -COPY makeigvsplit_cram.py /src/makeigvsplit_cram.py -COPY makeigv_cram.py /src/makeigv_cram.py -COPY makeigvpesr_trio.py /src/makeigvpesr_trio.py -COPY prefs.properties /root/igv/prefs.properties -COPY arial.ttf /usr/src/app/arial.ttf - -# Define default command. -CMD ["bash"] \ No newline at end of file diff --git a/dockerfiles/igv/MakeRDtest.py b/dockerfiles/igv/MakeRDtest.py deleted file mode 100755 index 2506e8948..000000000 --- a/dockerfiles/igv/MakeRDtest.py +++ /dev/null @@ -1,263 +0,0 @@ -import os -import numpy as np -from PIL import Image -from PIL import ImageFont -from PIL import ImageDraw -import argparse -# Image helper function -# stack two or more images vertically - - -def vstack(lst, outname): - # given a list of image files, stack them vertically then save as - list_im = lst # list of image files - imgs = [Image.open(i) for i in list_im] - # pick the image which is the smallest, and resize the others to match it (can be arbitrary image shape here) - min_shape = sorted([(np.sum(i.size), i.size) for i in imgs])[0][1][0] - imgs_comb = np.vstack([np.asarray( - i.resize((min_shape, int(i.size[1] / i.size[0] * min_shape)))) for i in imgs]) - # save that beautiful picture - imgs_comb = Image.fromarray(imgs_comb, "RGB") - imgs_comb.save(outname) -# combine two images side by side - - -def hstack(f1, f2, name): - # given two images, put them side by side, then save to name - list_im = [f1, f2] - imgs = [Image.open(i) for i in list_im] - # pick the image which is the smallest, and resize the others to match it (can be arbitrary image shape here) - min_shape = sorted([(np.sum(i.size), i.size) for i in imgs])[0][1] -# print(min_shape) - imgs_comb = np.hstack([np.asarray(i.resize((min_shape))) for i in imgs]) - - # save that beautiful picture - imgs_comb = Image.fromarray(imgs_comb, "RGB") - imgs_comb.save(name) - - -def words(STR1, STR2, outfile, n=100): - font = ImageFont.truetype("arial.ttf", 70) - img = Image.new("RGB", (1800, 300), (255, 255, 255)) - draw = ImageDraw.Draw(img) - draw.text((n, 10), STR1, (0, 0, 0), font=font) - draw = ImageDraw.Draw(img) - draw.text((n, 150), STR2, (0, 0, 0), font=font) - draw = ImageDraw.Draw(img) - img.save(outfile) - -########## -# class Rdplotprefix(): -# def __init__(self,variantfile,GetVariantFunc=GetVariants,pedfile,prefixfile,pesrdir,rddir): -# self.variants=GetVariantFunc(inputfile,pedfile,prefixfile).variants - - -class Variant(): - def __init__(self, chr, start, end, name, type, samples, varname, prefix): - self.chr = chr - self.coord = str(chr) + ":" + str(start) + "-" + str(end) - self.start = start - self.end = end - self.name = name - self.type = type - self.prefix = prefix - self.varname = varname - self.sample = samples - self.samples = samples.split(",") - - def pesrplotname(self, dir): - if os.path.isfile(dir + self.varname + ".png"): - return dir + self.varname + ".png" - elif os.path.isfile(dir + self.varname + ".left.png") and os.path.isfile(dir + self.varname + ".right.png"): - hstack(dir + self.varname + ".left.png", dir + - self.varname + ".right.png", dir + self.varname + ".png") - return dir + self.varname + ".png" - else: - raise Exception(dir + self.varname + ".png" + - " PESR files not found") - - def rdplotname(self, dir, maxcutoff=float("inf")): - if int(self.end) - int(self.start) > maxcutoff: - medium = (int(self.end) + int(self.start)) / 2 - newstart = str(round(medium - maxcutoff / 2)) - newend = str(round(medium + maxcutoff / 2)) - else: - newstart = self.start - newend = self.end - if os.path.isfile(dir + self.chr + "_" + newstart + "_" + newend + "_" + self.samples[0] + "_" + self.name + "_" + self.prefix + ".jpg"): - return dir + self.chr + "_" + newstart + "_" + newend + "_" + self.samples[0] + "_" + self.name + "_" + self.prefix + ".jpg" - elif os.path.isfile(dir + self.chr + "_" + newstart + "_" + newend + "_" + self.samples[0] + "_" + self.name + "_" + self.prefix + ".jpg"): - return dir + self.chr + "_" + newstart + "_" + newend + "_" + self.samples[0] + "_" + self.name + "_" + self.prefix + ".jpg" - else: - raise Exception(dir + self.chr + "_" + newstart + "_" + newend + "_" + - self.samples[0] + "_" + self.name + "_" + self.prefix + ".jpg" + " Rdplot not found") - - def makeplot(self, pedir, rddir, outdir, flank, build="hg38"): - if self.type != "INS": - if int(self.end) - int(self.start) < 2000: - STR2 = self.varname + " " + \ - str(int(self.end) - int(self.start)) + 'bp' - else: - STR2 = self.varname + " " + \ - str(int((int(self.end) - int(self.start)) / 1000)) + 'kb' - else: - STR2 = self.varname - pesrplot = self.pesrplotname(pedir) - if self.type == "DUP" or self.type == "DEL": - rdplot = self.rdplotname(rddir, flank) - img = Image.open(rdplot) # rd plot - # crop out original RD plot annotations - img2 = img.crop((0, 230, img.size[0], img.size[1])) - img2.save("croprd.jpg") - # get new annotation - STR1 = self.chr + ":" + \ - '{0:,}'.format(int(self.start)) + '-' + \ - '{0:,}'.format(int(self.end)) + " (+" + build + ")" - outfile = 'info.jpg' - words(STR1, STR2, outfile, 100) # new Rd plot - vstack(['info.jpg', "croprd.jpg", pesrplot], outdir + - self.varname + "_denovo.png") # combine rd pe and sr together - else: - STR1 = self.chr + ":" + \ - '{0:,}'.format(int(self.start)) + '-' + \ - '{0:,}'.format(int(self.end)) + " (hg38)" - outfile = 'info.jpg' - words(STR1, STR2, outfile, 50) - vstack(['info.jpg', pesrplot], outdir + - self.varname + "_denovo.png") - - -class VariantInfo(): - def __init__(self, pedfile, prefix): - self.pedfile = pedfile - self.prefixdir = {} - if os.path.isfile(prefix): - self.prefixfile = prefix - self.prefix = set([]) - with open(self.prefixfile, "r") as f: - for line in f: - if "#" not in line: - prefix, sample = line.rstrip().split() - self.prefixdir[sample] = prefix - self.prefix.add(prefix) - else: - self.prefix = prefix - famdct = {} - reversedct = {} - self.samplelist = [] - with open(pedfile, "r") as f: - for line in f: - dat = line.split() - [fam, sample, father, mother] = dat[0:4] - if father + "," + mother not in famdct.keys(): - famdct[father + "," + mother] = [sample] - else: - famdct[father + "," + mother].append(sample) - reversedct[sample] = father + "," + mother - self.samplelist.append(sample) - self.famdct = famdct - self.reversedct = reversedct - # QC - # if self.prefixdir!={}: - # if set(self.samplelist)!=set(self.prefixdir.keys()): - # raise Exception("prefix file and ped file has samples mismatch") - - def getprefix(self, sample): - if self.prefixdir == {}: - return self.prefix - else: - return self.prefixdir[sample] - - def getnuclear(self, sample): - parents = self.reversedct[sample] - if parents != "0,0": - kids = self.famdct[parents].copy() - kids.remove(sample) - return sample + ',' + parents - else: - return sample - - -class GetVariants(): - def __init__(self, inputfile, pedfile, prefix): - self.inputfile = inputfile - self.variants = [] - self.variantinfo = VariantInfo(pedfile, prefix) - with open(inputfile, "r") as f: - for line in f: - if "#" not in line: - dat = line.rstrip().split("\t") - [chr, start, end, name, type, samples] = dat[0:6] - sample = samples.split(',')[0] - varname = samples.split(',')[0] + '_' + name - if "," in sample: - raise Exception( - "should only have 1 sample per variant") - prefix = self.variantinfo.getprefix(sample) - nuclearfam = self.variantinfo.getnuclear(sample) - variant = Variant(chr, start, end, name, - type, nuclearfam, varname, prefix) - self.variants.append(variant) - - def GetRdfiles(self): - with open(self.inputfile + ".igv", "w") as g: - if self.variantinfo.prefixdir != {}: - for prefix in self.variantinfo.prefix: - open(self.inputfile + '_' + prefix + ".txt", 'w').close() - else: - open(self.inputfile + '_' + - self.variantinfo.prefix + ".txt", 'w').close() - for variant in self.variants: - f = open(self.inputfile + '_' + variant.prefix + ".txt", 'a') - f.write("\t".join([variant.chr, variant.start, variant.end, - variant.name, variant.type, variant.sample]) + '\n') - g.write("\t".join([variant.chr, variant.start, variant.end, - variant.name, variant.type, variant.sample, variant.varname]) + '\n') - f.close() - - -class GetDenovoPlots(): - def __init__(self, inputfile, pedfile, prefix, pedir, rddir, outdir, flank, build="hg38", GetVariantFunc=GetVariants): - self.variants = GetVariantFunc(inputfile, pedfile, prefix).variants - if pedir[-1] == "/": - self.pedir = pedir - else: - self.pedir = pedir + "/" - if rddir[-1] == "/": - self.rddir = rddir - else: - self.rddir = rddir + "/" - if outdir[-1] == "/": - self.outdir = outdir - else: - self.outdir = outdir + "/" - self.build = build - self.flank = flank - - def getplots(self): - for variant in self.variants: - variant.makeplot(self.pedir, self.rddir, - self.outdir, self.flank, self.build) - -# Main block - - -def main(): - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('varfile') - parser.add_argument('pedfile') - parser.add_argument('prefix') - parser.add_argument('flank') - parser.add_argument('pedir') - parser.add_argument('rddir') - parser.add_argument('outdir') - args = parser.parse_args() - obj = GetDenovoPlots(args.varfile, args.pedfile, args.prefix, args.pedir, - args.rddir, args.outdir, int(args.flank), "hg38", GetVariants) - obj.getplots() - - -if __name__ == '__main__': - main() diff --git a/dockerfiles/igv/arial.ttf b/dockerfiles/igv/arial.ttf deleted file mode 100755 index 886789b85b4b4e662519fcb7fe4d88ddf2205c5b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 311636 zcmeFa30zfG_cy-wKIaZhf;gcd97YEaXK_kV!8B(<12g3cS3#KqoO;Ztv<%J6(#*`% ztjwnCkfB+cnU$HDscD*1!()?~Poom||6Tjs3uyT~PyhFQ|NqbL{dg|lwdb|hUVH6z zhP&^91Q9g>(Wy8kZQzZEZfl%R`mY>yxh%UWc-7%=UCtnpZv9#nIOG}X6luZ_r6hv)l zG^RNK{c1=HZJtaPt24Ii9$k8U%giiThA$(C?nOuXNP!<%#=I`vy;X;{MdJ!2wS^KpWFg^Tg{> zVv}2;1)=lo`Zt-c;%erbJgVYWmH(`Y+Yk?GvPs2xwt||xrQ%piRNdrbPaJa;@{uQw zISM)MiDQmJzV^g1M@?_^#4$%rM|k3xqo#SDIOeG7bWa>})U1amjyY=P@We4k&A#=- zF-H+Edg7QPBhVAa92rwRam-PRRx0kx<7zQi#r=3JEgn*Ff0e&l#p|iK$ukJ?dM&o9 z{04{zw|Gaz8>%|TRJ;-5K`p*i@y06uXBBUXcs(pD)^jM=X=%#Voa?lVQ27xm9p>BX&kpJ`CBSI*i~OG3pn53%%7>^YAh}1Qaa6~Vsg?%a$sFM$)M+e zbjnrnAyh<#K$)kOLH&>_K?|l1z(mN=st&JfwXBtE2=q{! zdI>z!!Oa2Lom8M?oq{~({QrK|3~+QpOL>s7OpS$gV<24!TIR8ZQJ4pVhNI6igmFMR zMrY74^gU7O+=2Ss&Iv8DmZz(h%scT<8s*k(c}3`#$5V{DGp|{T?J`Bxo35mqRMRhO zmrL6KBf)KgN9v&nv%!-s#{p>)P;Sc3GoB5wg{FZ{RZ^QXXeb-%N*$D)7bfp~ra4_J zm3QV@H!LMp-LfVucGB*Qb`&jD>qDfJ}vI93~I;4O2YQ^1L%Y9s}!;?WR`eIcVvC zg>f$Hf#)R`d2BIkNxbfg)u_u*$51Pp^-#;fD-r9!P_nai@XWKVn0CedOzylY`P^Vy zBl}xk$p$`kt=rVFX^~Evp>)K)iKSuNWC_^%%(I25FP4M#VAk+j#+7FxmaFdgSSr@W z1n@J|Y?xlE);67V8}d!rP22X`zkxN%Yk)Oj%E-EByJPz+gr>aSj`eSz-FPkWN;X#$ z*EZL%_nc>z@*123+^+bTHphIkRa@_R^Y+d&SPG83N?12+Q#=~e;&`o1SLLjEQ)?^% zuOU;5+&6m}Q(`Z7maqu=aeajF? zS66#~eHN?rFkX-Bo#zRP*GzDk3Z7=4JYsWpO?$3AAJwi~^Q>Hex%Wz9u3uiOY?r1L zu_dzonyZhkl~-$~r#D{RrZ?cTr1xxO_Q}4_tAQ(5A+Hl&pX{%_-pPr&roZPE#Wu~o z^4ZE<&l5fNX#Bo3FK^^GT^kIAsHGRmn`eyBqf^=Np^t0ZXubm6i ztFR8tG4a|qy*KyDzJfK&*BCqk_Stnu!j^XRoU`BO^Ca`}dJ1p9>`7RgUO!>_N?zIC zYh?B}W^JBZuibh*wAV_mr74z<{knIC%#qacn~qs3!P$<_optXXy#BGaW)bX-y?rrc zV2$%kaPOw)Gv%sl-Ry^XotmrDyyhzQtX!{^de7fp8#33IImcIzsMbCPs_Q$i25V=I zHOxKoIfv)ht5fgw9@|?9>Ud?Y9a~*}d$rAT%~z{@MZ&ht_Am`?`HWEO2mYJc_eyH6 zNnWeG>X~Ql8Z%e#U#)F(eb!#F@~Zx;b?lunwo)S>Py{ViZmMw?i{ZoRjiO%bX?793!vLD1#P8 zr5VE=Wk#HlKExO{ablw3D9kaO`K8Y3xu}-t71Y3ZizbygigRby&2kzkC64KNg_F2# z9#q@L7+Ev{tcT`h=N9EVN)woSNnUoI!$@;XEzH3Hp{A}K`V|$HISW|rB{Pju2Q&yJ zmtBf+I0_Av&E)at zLFIV5o8!!OmX;QwJ06e&{HJE;8hIX8Jou@FPGfprS*}t;K~Yf-w`P?<>@o3(NAHsGW_-)RLKrhSGG=G-rteb6r;Abd(jKh#O>2#eA1?XFPk(5+%aKsrmVc zDp}Ee0Uk(W4sxcJLOqUrqqJ;hzO$|-*iuWm7iUR9UZJW{QZxl@9Z*H~RCHucU{0Q6 zQW2L=&xOv7TxWhUG*V73O85x}&(*2{jgGJJE@$ZU_uT zI%i-o1W)bzPE226S)!4NX1NZSLD2+WMrc`fg)nTL)7hMySO&1o zoV*feb{Xb*BJ^16A9)IkCc*YmMNM%{J4_9%cf!;%u!fjLSWz$$?|@pAdWC`^*QlbV zFh zD7dX7Cnt|>0#;d~JaPhOmnhw;6%Rq`3YDK%kjDc;C#v4`qLL}4rjaUhR+&Z9ajuv; zAwRD)m;1pynd&Zpq2jcHsVas(l<|5rWa{4Ak^>QL@c>yJoSJW6dtBnJP6fhev9U z7yOew@f3RJ*6^%*4^zI~@o zph+E*jHE7|yLIjs;Cqc#ca3aw_3~nB>lw9LhqzVci!Z*A%|{G3HzEKRZj*5l^fgdh zmWIn7^VXkl$a1vh+6L`)Z3nPb+orvM+uW;f;jTZ@zy3(S_V%*&0{!|U{p*kPYpsDF zAYOl@fBliZ_rauj`+fb9K0kE5{z$*>WAFd=OM~l=^v$#G^+)HlB(NdJmE|JtVj{2UvXv*9Yl0>NkLPt100g_-oZVHLr^Em&yEfVj=Ds@P&_XiPhL`%U{p+!BdG6e34~t zm$){E+9w)y>v(p|^3L5bb0@0LNZzrC{mmJq`*guZ->yj5@SVL`m%MO^|1B#@VVi?8-Ye6Bix9>*9?h9 zJENn~-RNobGE$5p<4$9(@vQMeq$RRJWaG%F$hgSF$n?l@kxL@iMad{@RA5waRHLY7 zQ4vwCqT-_lL^-0I(K0$HIx?1GWvqW}P;A54kl3);nAn8aPO&{>^JB|n=fuv7T@d?l z?CRJTVmHQai`^c(JNEt94`YwTeihdaFSNfbFB_M; zT<&?f*X2H!Q!Wp?oO$_<%kwWUx$Lf*@JH`IE>>5$tJJP)gH|cctr9Pb55y&CZVxnf zRHGWr&4K0?YES7xucwdD$LWjorTTJxjsB8esUOwPSzMOaEC(zXJerHNCR?-qQgau= z%fnY0{ze0%iD5u9ncXH_Sv4phBgb-LanV2RWDSn#^>RxJFDhY z-CdPeh5hrN-%50re+7Z#^ju)cnOjcJIsN;Yb*HBz^(wFks5pJcnW?8HpPG4k+vf?V z7oJ*oYU#uPjvDbZhNZ9$=gnjN2=Y)MXBK1Vi6Wva9InnV%yA!QW zM4bpd(eQ*ge*XBk$Il)A^7vV9b$rk99mn4|j$V$xef;U;FCR}m-uHN)<1xphjz=C3 zZ??19rDkV>-oRe}H*C+?p0KU3J#JfWd(5`X_MYuU+iKgB_%+4X)?SteETm=g+7(#0 zbr1O!Ht7T2L!~chyiZ>pT8=mNSD`jkEB~u}%no+wYxq5eK2#s6XCa(Wx1K%@ITKCP z&958R@yic=xIWU%)A^SM|K*{_={(=+&||!@zY2BVzslu2eee|}8a+jGXs$MnmeN;r zFD<0`^aOTjuf?w~DxlVT=wZ4*KhgvAD9xi?bb@}Mm9(DzMHlHOT0<|;yR?U1#KYul zS_Egk7cTuBdXM(g`?QZfpnqa-!$07!V51;d}TZ4CD z71Am=`rGh5e+hOf@E)9L*iUc{`~7am3FRMj2kiV+T1|J-Y$~U_=o|Wuwu+_VQ6YsU zbYY>(R3(;)$Ha2+IQ>D@!YXX|jfPvS5KoAe;z_Xze%wd+3P0g5)`+L*7y4DK6;F$2 z#Is_ZcuuSr&x;qti{d5tESK0IHi}L38yyxEV!qfcUJ+ZwtD;f_h_}%?LF-Sc|rR~JFFekKGV)< zUus`#-)lc;Kgu8FMforJll)o!B7c>?$=~HAd0AFzzM7xrkF$%wpw{Y7>(A)V>g)99 z^!57l`V0DtIIFy@yYvnEMx2>9>#yir^jC47*{W~Dd3wA4y8ec~L*J>tsqfO?(s%1` z>wENf^mq05^u7A~`ab;weZT&pen9_7Kd67KAJPx&NAyo{mO7??svp-+=%48)^;7z3 z{fvHA|6Kn<|5E=-|EGRV|62b>|5pD_|6V_@|Da#cf7CDP|I&Zbe-@vMFZ5sZU-jSg z-}OuSW!gX+WrgTOuh15HOMFS2Xfy4myXj53558!aHc}g|jnGDEqqUp03@uGdr=P_? z38T)>Z|nCI8#5Rb=JCQ|I&Vf_q75Z$08&mjJgL3HQi#h*?oNd`~&I*2GtL4(6CYC zCLv9mg@!c`k1!&mqGMv?TC{A{IzFLIV%v7@J9O;Sxl2;lZrywIyrEa`eaLVcT4Gj&tIq+`P$C@(T)!if=0^Et@)R z`iz;k|6|r2v+pdw>+XBz%)R%%dG}Y$U+}=f2NylG_~A#EEPZs@W6K|3@x;m}SFK+2 z)Y_+?d3N1%>z~(Nc=4r|T^lxT+Wg9vS1Y$}du{vcZ|vCl=B~GPzrE+3ci-Fl{=N_P ze|UgCI{5LS!$&?jdhFBVCq6rQ>iV_g^=rrL*N)e(9j{+IUcYv{e(iYu+VT3eBcA8{ z7uSxS%fx(yb%6UO5<79#i2G~HIXsTTvk0Ht0p$561Gu;M+e74^2s{IPM-+g&x&d{Q(RZJ|M13a`^~10D`(s=KV~B3rOElOQC<9Is4MCkD$Qx2kG!)|;f&28d z)d(bk=k6oAw;qrMfd9Sd>plZO`o1;5X`*?MecogMrpGwvS09i42)xW4zQo-am@V_ z1BjknM6~K6(du%%kplg%IZw2<3(?b%=NZ&{7P@=(H==bDiJn8<^_z*F$M|22BYGKa zHXbC}^e)lnLf{ReSIUUCpiN~50At>YHrs{+mB3F#uMGwu|Lc%z2l(!62TUM(a{aQK}7pI5gk|$d_nY4 zO8{g2Xb;grv_Ci$K)(ke)5iud4p;~veJB`62Nn|@ZV22>bR-v8OY{l)`y>N^K93^* z=y{@Jw*y}heR>d2;&uZV$B6_0b8uo6(PtrevIkzLF^1D8iOwK>7JYnw! z5Ph|j=${LSz78Y$=3b)jFplqw0hE3J4bgem7bQ!W< zhQ2OCKbOI;3i_+M2`C4)166pI>PzIF2J9grqJSB|3&153vOh2n*a`fCcaY!>7usnO zmRTfhDJ1M@<6EiTeINoBlBicqBFGmg16Gizk2jq(*bY>YXxI~&1#H9nUP6Frz!xMM z>%e&uP0+r{ZW1BEz*yiJU>9%_s3y@g96?Lk1$eYLN1~+;WC6=bv_ibqeiE%A zOMD8jn?%B3U^|I6BLTE)gYhM%16xS6MZI0#bVRx%bNFW6O&l#|j8K+6iMEjXJ0Ob1zng-ouK*y&Q+-LS zEd%BNj{xBH^lV@ufN?wx9Y6C7iFMG`x^^U<%OJ5H@;rZ%#ET^)URp%L6-HtM`rnX4 zViRQFj6OHtMB)|b>D9YQR9Z=Fg`I3&Mgp!-yk1UX2f`iTxAOprU66ZsGZJt22N2(b zG3;3ipx<{U0@FymHx|ezv3CuL_gy4DK>9=I;Xpcxk9L#z*dG{2;&3vFBin&Xc=gmO z0A-)*Ktmt~K)X-D2j?I08F-)k6sRU~3i6*`LE_9%5@*5dEXMHp1rlGtCci}gU!mWB zLbh{|`P}UQ+JF59@D+(~kcV@T_!j#37JYwQy5T8;%6%W*?)olf6c;c!pebzBz~Vo;?hPEmn%qAq23>m zt$HlpDAkK3-u5B!4Tuy+NJ@m-0AMFcy$g^Jyi3xuh@>?PNC%b!dr8_l0Y8znw*)eP z1;BRTJCZ)A>x;NA%KcE!4}ARAlJrj|8PFY=OtK!{Ayls?FbmiRRFMos-$AEI2KOh~ z;28kC8lp|Z86+EhL$a{}Yy>WlY!VKPB^j~+*i5o1@|u;C3~dd(Kr$?sWOJm$7XrIT zMu0c2qh(|qP)suFb^!U&dq~Fg1u(8y@QkyQY=N+4GhjE#R^Z+GUXt->lYnuyL3x`B zl5NkCY!?h*4DB$^_Gr@~i)6=%Bs&cUz`qmv>$D&EjU@cL?6L;HSiAf}G6`czx(O%) zRs!z=7fE)7d|lDLE82HO`|jY|J&|M&q+)^ksQ#Dm%?pv^3l-GO$q8Q?S9 z1?(hw=Mj>3wIq3W1^{{QfgJbrCpiZ^=U^`8qWxU7zb}U5ylo`!p9b7ZvSJR&`H*qJ zWB@Weu$JV5?EvWN!77rA9s!W|P%%(WaxwTX2LFeVei-Q`=wr!fl1pC#-XQrHWO%GA z$;UC?$Dz{|F949?34~7|d@>!#A-U>qU@^(n2Lb4A^+l3vB*~{B!&A`RQ_F$%B=HVI z`OHlupA7*Je{L4Z_0=Sw2VM*&`4akmX(Gv&!Pk{Ya>HKWQ<58hBe@B6wpfA2KpfD8 z|70<+9oPq)AbAvg zjCUd9JB7iX--v&Uw6ZL?u0LFg;d{1lvPLur17w80F{GZJOHUcN` zI!W|#67rseyeH=X7}v?8Bu`=PP9*@NfpTCya0sX-dAcPq5|{-%0~`P@kvtOv3hr4$%xq z0SbX-z%JlBlK*TBBmOUQztG1|3rYTry1ziCU#dv{IvW75-$F?K-i72P=%@Hcfe`V>U9AYkrs&l15qAWNm@`CFd0C+eh`oWfJc3dr9Sus`vTno)Coo( z4O#=`z)z$#8~`A%kpxiQ2yGfezQ#R)X}}6#A8?VhCLzF00Ay)`ybz2%WF7$doAw3J zwkhg08worDK;F=1z-V9*fHq+Spk3GmU@?Hc!_Jb{9DO#2jLmZZ@M#X&!XalkbQ-=K zfbJron+S|Qq8LCP?h&;J^ldZ*k^#tSpbw*pw8#ViydswYdw^d^i;4pvOVqu^Rcmz@x=r(prLlOVnuv`QpK&O%!Q~=)YZK(%PeL#}d*y zjUlb`BGS4*A4w?dT1i?r$kro+w4Ug{*I3eeqmSfsr1klYw0<$9rDTzox`yyxRy>1j zX%j*`Z?#%277(k=#(y=3jSs7cwfN%*4(kTVn&EUo;{*hX6YhoHz}vDJq-4I6@`v|3Od(xZch<1KtBX%%O;wF<++Rv8>kE!ME? zY77t`!D9I9{vtT7mxxdFOZ01FwZ_K9#wChG{}%o&L^lyDL}F56Qg95igos~Wyxt{ zY4|81db!o7TiBQ4))3JxF$s-gSW*!m7R*xfXYSf1ySv543E|($J$EgCmh46sJcIt+ zRjgMNf~F=$1Sf`x9}^SP zs!_|SXkon_U(r>3yW_U+rFV_ zpBUFZCQh_y>6ergkkmLSCaJSu!`JQL?o%-_Nddbt6wzME z$1-}3O)5n0uff9qSAk7N;#UkJD8b)9LNsW+Yu7Fz_Huq$U;`Xnn_2iV3B!m{k?WyG z0+->i0}X{RM5wOPe?>yC%uMjMMYfL+(Tpo=a4J3&$DgxVl7D;DVJ^&`YH8h`g=mF? zS;fcKug~zZWkceFg)#+e4&!R>E%t;K(l z;^`aK{MR3IebmxkwqD(MLDYV*m(BADOA1TE`tp31?&eQy9M|U(^Fugz<=F5^WPQ{* zP^6KjGAx>g-FbpSEZ_Oqseb6cjruv`_C>XRMH2`e{dYKwiL(nGF2s8xr5;dhJh&Q9Ah=)=$&@e@tQz+u$ z6z2Y&A`~^KIpUEN0UCujME_3FpubTJMS{js6lfgiuhfE~L0eJ`Xe){XZLMfLwQ&DJ z3Dgp_jiQOv8u7Li4~nl5G*Zztx(V@g z8sz>SZ#fzQI$F`2X(-|uG|c@S-Uv4wbPVYCbPJ6@d@QAb-b(49;}jiFquk$87L5jV z(9NI|K)=C0(oE1C8UyO2TR~6h-mAL&OVc0`l=5 zNzfuiiz&zbPwXmnf|k%E&{9S5g97(g*kzgrI*lfSP6z!GyG^Hn&ZGj++o=%rABxVR zBE;{YV$j)ioBIpwLuFb{rJ#3F8R*?K75lgEp=qFV6rD@c-Dk0@b_VEuG!t|l-41#` z{lk5RDrgqye7XZU3qa3cm+frOg>)zAgLD_@BGA*=HG4PcV!8+PVVdJUMUN=Dgytf? zl3iEsDNMYZ0%cr$M*UGoafPeT~*3zMYbT+uIR zAL3uq2cTcke$ao?0r!6Fhi3XUeFXXq9dv(yUDO|gey8a7bO`bDbi}<6`?Nm+y`boi zbQJN6bPV)g^eO01ivCO|-0#yb^cm={bQ1J8(7o81d=UImp&tM@piu;9 zTVNP49(WLV60lOBC#MMEoxl#@0$`-8<#S8D3E6QIK z<)xxLPL#)r@)%KWh@|978M$#F78Zh36>RFm>qkj3~2m({K)>Hnr|;rW3n&DC#pnzJ-QwVH)0|Q-F>MdJAUs7MQ#Rc7dS1f$_jj z;0M4BSW!C)<9JZfK+v{8Z(uxdC-4JcRpR^rQ6v?CvWPD$!P@B|r{Pt@A`SkIT3+02<922p~ z6F#`{_vR6mV%f&H*CP5f5|2{24yzO0C{Dy8>`J97-I>DdoZpGU zRds7v*N7MQs;YOPVEM*~*&J+%xFhVwh$&&Jz-bnYD@7_fFk)ofn1})3ml8H1BDoYi zw?y;~8yC^jtks#@Y>8+G5#r5gYY5vaOm!L^t{9F=sua1&3AUxS4BIeU7h4Bgf-TY( zVQX#+wKcQ{+k@=&?EZFNyWMWJ>voAp^bITBr<3Ce6WGui#DNtCEl5}KAb1$TZU&U3 zuuD8hb~Vrj$w6s-#UR(tY#KDdaQ%`NT`7D=jB#0_`wCa^AR3h3*VQ$CP^Ha1(v=iH z$YmRTOU4Ev7G@&XCFfNNO3$biZqAw;>I&|cu@xJz>(6~4l*5*DAIQw4kjB$`hx88a zRli&Rl)oTZDl;o3zV-;IjmC$>H+L-^l$POI-#pXRfg|qbnS)#prx{~2w&Dl#7gAHU z;hhkN|SRWr%PZwNoLupKE%7z#a)M;W+sj5@j#Hd?mZ!GG>f}l?0a@wov>}_1m zbzHp^o3Joc2?s$H(Tu`Wl`zpvRZ)^}P*vJ`s@z{w<$l$ZW=f;#FaaBEX7I2HIE^}W z6#4I?&c5;SVpEUI?6IlN=+vy}R40(-nm;W!#8p1QFg9doa)IH};<6^-P3#CAPFH5M zGsTr1onmb0q1s(lJeG@lM5k<^v8m}98^$I(Q#STU?vWbpNXguEqgb?s=kuA_(tw4iTg_8zHUsuQT0X2#)oHYuv6d6ewaTFH%UKV*lSj3 zWM<#SLB+k4@%D%enH{C>hU7hz3tQ9Wb_dWu43_lOML zCf0e1f{tnZBExINJC8S38&oW$bSuQ;;dcc4- zoQ`%N!%-(A%Vi*||CM!I2Dfvd=!%-js5$Yfn#pF(H=>c(a5=d!78j67JMi{={}5_VQBs0^YBS@Hfk(K(K> z87|EM9;vSS@#r_j)#UasLTZy>j@5Es-O^C4A~ht>;K~&h_ZhBLBQol=4UJ?DnVH~$ zwlcPVRz-hwy#O;ch`pps&dtnliMi+!OFmYO${e5Rk7BbX8!n&dzR|fAld~{2%_>|p za%SYlX3dhf;*o5#RHGt2BRbO6J2X1ekrK9{Aytf=xv6Qg(e%oaHVGSo>YIw)P%qFE z@eimQaYE^4nTo0E92+#UMzextj2?(J>N2wNtD2D+(HK)#CeE%@k=+#&fFo0&l`97` zlIQa2msJteo%1n7;j+XAMH>}AV~q{^nYfVcq{h@-!r_g(;^SSd zTeFGS`e9BXV=tBN+$LdKrHqa)4#JfUyK@>2-8wS6w}tv6BYA@7S0>X0BwXbqGR(9= z6GAsqa$9&knZ+eLy(NuCaY?ziq^5aRG%R^DUujSymp!iLFEFTagVfyaF46ek6g$oG zL21#0;Ixd?iY$-S4obfwWtOu-p*NIyVy*`LGPF>MJdVjwO_iFy%UcDGEW_WW$Kubb zj5?>%W`|X(9AWf#1!WB|X{K*vq-V|ir$&|T3*1Ttwaq-kaCMLO2x^M$>TyN-3i13a zv_a``ITAj%qQduzGMJhvpWi^nBcq|whsqp&h#@rRQ^S4L*2D;X+7Njg@M%FYb+ zB(JC#>S>gTgRQeoLVx(oii-ZxM*oVe3P+{8d_uGl6kV}Z?vlGIic{h3Ov9;kZ<`c1cp8p{>ku{r^+)cAx}_tr8M(`y!9mVbTcnmuuo(-_C-7(2D_(XN5(1r9!kQ_k3uRI zUC#&nuLSUdmDP3GAv^ zj`s-(_ayAB(Xf+dI%LqXljH-jQ^te4lm00}#4N2JxIE={xp$*h7Vt>Lm(OKLm zBQ0ayL-4(IWArftTpz#3d#(Ed_VgssK#XTI?H4<>>ObzP?hU=+ z|94|IPB)YkVPC*IbO3v`-jqcae@h2TvgLMsYu}LCW1q^+*uSuuz7oI6+4$V4?a}+Y z`(mfjTzn4)ZDXgv8N8vZtr#XoPpDO>Q4tPk5%Ia~C&$ajvwMu=NPF5b{|AKusW7~c4GK%A3(WV)P!_hsd3w`s5Ieep@tOZ9s!_gdy# z&sAqs@2>u+`d4=c_r2JYGYk7p9>Scg#K^W{zrrzmPSIIm!5hx%;bVwMF-rUcpV{I8 zyzz|Rakd$~91v&4*SO*NSzMO5kHZ_lLisIT_(aPRJimBEu8<$%b3lG4f7hC5QCd9S z^wbk?k1B#x_i2mpc||*;H^V!z+|Y9e%TmjemUWirExY)AjkddS2f456sXto(ak84K z=T$GQ-dMdEds!M`4smaSXLvm!y#t@gnD3>qxR>c;>_-g2473)##9(Mc!&)rQ`8Vtd)3g*FP;|E$>^t zwEE(GllNLHtv}+PxtDFYZG`Ps+k>_(wnO$TSm#^Vz49t7u;vh_@s_hx?G;)mJL*mG zfNVePc|87o$`A=lmFvVjd5736V=ObQJ!B6tlrF$AKZ5sSK8bgk_0WchK_ZPN%l0Ni zYeRiKVm#e6^9yaqiV*C&`G#J?p1NL^*Qi4O1TV(z?OuSLMUAjG zFba}Qz~hiFFv|1jVSu)iI#v&r8{GXh{QpABDeQ%N)*T`6-~QbB*kQMwp0-)YVT;F) z3e~jpoO zNUue^9k@qL;yU29)QY`e$X|i-Y~-y(n1S?a#K)ptJ5Sul_5jX{qsa%bBEJ>*%m;YP zEwneOi(cwJgRx|S=S{%9=wmp-{=gvAX@IaVaG!XG=81RQYfz4m?m_?eaXygZ2?v1B zT$J}lyBOr%gLpH@!H=>6fk>dGe1W>jhO`}FTa5o^bKDsFJCq9<*B~^qzCun*xLaBjg8EpbsVieLZIR62Djh z@a&uQbP~8h&!M463+Oz}vYD218W^Hb>i$Dtf$vPPvlL~wTbE)SInXz}jKm&Q`5m>f z#?qazuN3gV3s?!B=afCzwCZjIQp6x?iBqSO)T&D8SC4tF7FI+k;oE#}3!jhO`EC9)Mm? zBh-OLEQ1%y&e-k%lkBl+gGlBHxe5^Wg)AGqydm2}@E-0BQQlvl!geoy zQ%#7qr+-T~vJ7ay24#c1A>vCRGp~oGaum%&xEywT5BRP?{625UxTKh@!b1^_hvp@dKvuOLo`9ZKq*=)O0={? z9>#+)y5zSs(heVpxg3V{V^@XCkhjcsSWLF;#5kXa4?0XwK(Dvyhh>z07(Ziq-u<-& zKQOmEFK4UxpF)&#of?RpW?c>$b<3E?-^s7r_P>qGBbMjkGoN>VV>#@0V?2v_P1wE_ z?Eu3YB7Y-L4z#w%i)HpHqS7{sg78J$Pe755echI1O44`278}9OV=W+ml;sS*j(vdc z*AKgoiE=z%J52Z58j%C%szB}w-_+g1VEzb;>)6p1Hs`gWt3$87ULC>~)_QCz0-+V_ zZ@&j$AY}Xk{0uvK3O31oC9!`7|D}k}1K4-KUfq|#%e2q;=xOB5_u8j7WV_=vb(MYk zd+qUBp*pv~zoQ-d_(;fczjyqY1HnEHeWbyT?}D6Lz4fmS>$ItV<2hd-b(o^>6C zkZ}`aJj-$d@WYtzo84|}fA_Q2&F*ztuzQ`g1L8*ki~CuOdj`wG`mkiUs}b_~v=zpp z&ZTBP`FYQymX1`w`rvYut<*2lBdFgFaUP$K^<}!#avAduAIN#U4%HfjJ~8eodKPrP zoEAV2O||>59+3}>Wu2(GAPV900^-fICGdHCPI`du(T?MKf!lVZ`s#eqn{I~e@2R{} z@Ijdsd6t`LjrCjVppSyD-AOr+VJu|X!SaA-l6@)#*c-u59H#cTK3r{SM82rEO6ee( zo<;qos_hiq!a*0dY_h>-hoTPiTdmq8Q?N%L>~FHo_P7>d`(j;yr?nA{M0+30xAde1 zcX?Ptt8L}9+Bynrt`V)XB-7JqH;QFJJH8IONnb*@V13PlZ<+_6!`DlLEMwf4wdXOO z8Sv|XRt|f89=}^DhYqKp9_z>SvG-wo!?pDkhcME*1m}Fdo>+ow!FWowPN4^ohqsbb zOY}P*d2_IS+hOhAkM_KF_!@!G_x;G{w!JYY^Po2$UN5#}YG5r_*8`+vKp)({_BE~6 zZlZax<39E!(BoWcgY$)e?S%vFO{ny2#2E`rsJtMvu87ov=nj?Nkq*lBnjh?z&!JoO zyC_c|O&zrMcwSMT+UOr)t^Q7rYk@Rg-%F3{m9&7gZ3A}*8}l6jlr*X$5`*B$HnjN7r374 zW3&5a4bTTr8`X~6dZ1)wdH+IYIS4;Kx(Ra%c{zTCypXjjLBCap;fZ zewFY9gxs-aSUp1GE*@BgjT`-NZ!#0#^58Wt)L(oFaL9O(2B>fY;v;a5?<{5kiF#*x z6}SuW1cYw@FPmvxyLHALTxXgK-ftq@#Piq|=aB1hXmE(NcoUzW|E^5`tMxIzcLKLEAN@Qv z^4L&4afG5^OQR8v)~3=72GVVi&h+Y2`~+KLtfPlje$D)u`LGo{Vxt>s=he)=I(_xL z{-?CuL{Ha+*ABd)burCh=y=+}xH@frkLEC}Z=(#jI;}s8cK;4zsI|6S*&=@Sjk;H* zt;2B61Y`{SYcs`b0r)MbNq>m73~*c3K7bz30kw9qe&uG$W7L#)hW`e<`sOvrdW8(C zy?1`R^WqJ?^&wxfz8@HabH#pYhcFFcA8+WjA+Nvira5GPQP<9uFM)5lqE79as9hUa zE2_Q62i2|re^VT5XD{#$u;>5uxcJEo`v`phK-Lqu4(W~S$iui!xdqS4=nwdks*jH~*gtue_JFTy<~zpVGKy$%dQ}2)7}e<-K?K+o5+p|3ipt z$1CoMz4QN{LX1P*i~Kn>@AGlbbQafrF0b5I_j9c@=Cisk^il3Va82pY*U@|}%h%5u zU&DGr@L6kK$D&`NhIsy>LdfNVbRb_l<61Qp;cA5QtQPgmU=-xa!Z|VprXPYU^#s4h zcD!Om>Q<|-P3pRC)pSi$uc1)HjQiQ~ehI79Zj%-pD0Y=0X|wy<>^A)OCyN!Waezwv z`w_bt2NPe;1Zz7sT%aoFdk*}s66uX2`YA9k&&!{!5FEOu~_c!vfzN0Hsn z*NR$bZn4^>74@xFi0=oPxO*v41;X-BJO}{uBMdj-LepdcKCRDazc@u#>d ziJ*7&u~~i80C|uWX~UaCFal2z%c-R19A94y&TjQXp!uo;Y3PZgtP{JBkHyCq+X69U zXbOU}_&j7p(U4y-!tV@G0f%^f2nvt~-rPc3{QZ0&I$MDqUk~{pfws&-j*|`lwzkegQt7Ir9dZuEUP-3KdPW*)+VU1r#P|9zGn=EIwSr z0wH{SIG-z9%(|YMKAf`Rh7DH`x^CfrlcNrki=u3!nkgFQkZnjE=A3F8Clqh!O}8>I z@T(5R($1%qD-Ijm>@^QehRp(bGz+NBiiL>2u%vXRj5;#H)*(F(9w)=!-^YhhQ->EK z;N!#XSYy08q*@A`#$rNyo+Xs4$>JraRtyvF8Yw=^%xg-0?E*-6twQ0~*vIB$yFxig zU?f*({W+}`uW9O6(wd=rB1(8zR_%xTG(S9IZye_7se{3Kb%Bt(w(7pv9|n_GJWV~> zvG(vr@(>^RN=?V2wL`10GWbfY1J7Z`{qY}hu)_?Kfb|5MG*dQS>g+4wJiyY%=B!yY z&P9%9v-n_Qb!Khj7QDJ}prUG3rxP2$>IdeEsfU}u>~j?i%ooDwKH%rA>km%;em2yC zTyR=aSp$02{dlpmm1(?YASfG_dDt|pX^RG?7WSgZgwKHfknIB%u&g+sMPBac3XaO7 zQr!optOkN&2+0dlg>VacfIt4}A6{${0DB(2Eng2V=3RRSLGIuY^;&@??zxveQv>=dmHB7FLu>n;zQN&the$WuPj1iJF%`FMKxV zYacN0T3^W*9uj?l6Z|3f%2v)JHNiST6z4n>0Rg_g3?8*Qu)r~V)fReElW6)%ei;~Z z<%12%T-Dsh$45$tXLgH-8ii(NUR&&ImO#R5l}Y~)Uu@sM+7V(jS7-e>&9?WKhq*p# zDU0u224={8njcPkd^B_XqgM?m}vU&%dt zRAT4x@dX|Hu&E?8R}o0b3%bTv!Yja6svM}zBfQ#^Y-gWq2@J6D2f9&b(!c8~?E!qC zX+S-HcHLl!HDJdjo5c=;R$ZC`Vusa+Q#?REtPtKaCN5+-?dC9030mU8u|LxSeE5U3 zjte~;7`={v1ZVN}^EEvHTm+mcrk2lzm;%iQUYKo@-4X)$DUL#;NT4dmG;6&8TrRQ7 zISUaxc(B^hiOtssz7m-MEFx4@$5-+sBh$|xz7k^dr1Ow)`r}?K0rjrDu2jAf6KsJ6 z;L4UA2Phj0VvvT<{Ol`LQc0s&+S!V^F4t7`>i9|v-i&1)lKFTJ9m>b$gn2&k^<0g^ z;VWZOHl;ig?jrmx5FY|@(G`bc`salYbN)96&f9h+5c@Lkxt=coS<4KsfE@DNv9ILY z5k|dwetryPe|)CEYX%vYIRR_6^K~aI0rQTQXrwBcgvV4T$rl9{tT(?v2omUT&OR?G2!@Ld+%m!; z;SU}92iCKr7Mf!m8oL#^KfIq6;_!51;rYY&U?^y69(Ij;*06dZJg(g=7Isp8ob87X zdd5<>LyLUUfm_z?7LNJb{Veb)cK9KyDnU&-bkyRm?Q*DFa$4H*v`dC90=t4WicIFi@y1 zz*5-am(EasWm0H3v~pY&I1CBg0Fc4sDkSI@v_(tsLWWDBSTrlOt$&B0_rc`8+zEgU znjVFO!+eH{iN^#n6?EBp9K+y=VpKunz$A^{CbY(Q!hMM>lCPk35@kpdk5U&QGt^`f zwEbb@=c9`FT#6e5lrYo6oz%2(Q5202af12)Ezk2A;5iU;uGKDUJ|-6aLfW6GkptCt-lTksJnA^XN=Pcu*8;3ebA*){9}$uS3>6T1#K+2bfR!Kw)KH0u z6r@)Y{Zllo{Cp&rtYal9{Q*{PoUEg0{3zfD8)_LpMueK7R*?v${gImBgqR;v7_6r| z|E8JDW;Vxg{sj)hn+#24Aqm5b4y^PMGI|m#snkiJc0m&80+kLymU*56)!KK!NihgTH|8vz-EkXFC-V^L&~=Lx?;l ztc(PF@L$K&lmSajVk$H6#2k}-9xOsW4Fwc~*Jm7xsrQmGVxS}$PiXM?p6`z6SP9(QfR#8*&_k9ra=6ULO4`Ra1waX@No3#Ih?R7f zuG&xtjbz*d%#4qmCnb)U41@wDft4JDS`a~!n@@)%CXtJ<5`2!C6FH-iSb&vuTl5vU zX0qOdiPXo+1O#hpYF`%NF}^Z{h|~mFSfCRdu`)Q2^cQaetfa!xaFECVlQ<4?6)8zR zDUlcp!3-_*%ea(84i`!?i3WsJkamFt0xgO8SVz)!!K_dm(ZwVW0rw(N{A&V@ zfJ9u9gOA%2k>s!lfwdZ%B^ zC5~DZD6>(H4)zs}cVg_B0!-_z?kE-z^)`a7Nl+Iw9^+g}hAt-WMp^=3V1knij zVSS6lgAqx$g=UNHqV>>qmyU3$Ofp9N2~fZlF$i>MyBL?^;cM0C6cs z<|q_N#1Tnoao-aW>s7*HIqATt81J9*=>*#a9^I0}DMdmZ6?BrHMiY`COE=&IAufo5 zj{s1|scX=+=*tXxIR*;|Om}3ENTZDbap(!qCeksH6H`3eHl->;8V!c2gX3f}n&Aex z1)sq|o|7a&;&=sZn}RtCdI4?*rJRK4q==YDV1FcZci0A65OGNGur_XdZhA@pNF z3T{MQj@ajRAP6ki0Fp=ol;MPrU1(0C8yYo6A8e~6EK@#Qnew?5102dIv=X?JR!NMu z7-)j(MSTDl@nb|7k#Nx{#pod1Q;_2+3K>-Tbm!mmOvX9N30y{~lfrw8^AtWGz zOn6k5Y72uuAE9dcwTJ`<&rbGP&w#?SoeL>dF(aNam!KwRV&mo`7|mz#K7RyULNPtW zataS=4`r@Nx;GV^ip#LZlMG5N6r3YA!*?4RB7+*j2%_*nrbLE} z8m9!LU|-{S0q`jDu{58Nc#+4~SHKk-02J3IGFWqkXaaV`WEK%g!T^3kM+bjE><5Yn zJT40E>dRla3TAS0)X4xJ=$~ouum)?4CPFDCF;>=%o?liSzL}*k=F2Ms(3@uGQt-&})H3ap)jN>$NKvzvyFx+v+v?wIGNL=w3 zj6f3+MuFt`EMXfqDTAd6t$Bv2^>m;}V2b0DyRT?s2;H{j*8fL2cHnw&wEfgqUQ zQ1cA#fm@&{FgAEul@uPuq7%`xG#&XmI<~2_1ek%54bwcv35*P+0Vop61gmQXKj}1o zORE)e0&i3fKufa}WL%6iP)LnW!-g^^gS;5jG{u5q6zprqxGh<1 zd_?f&Y8rlUanJ;DBA8Fa2+bj2sQ|YAGA-lL*8j?( zX)dnen)C6e_%B`=gkivo+#sGVnn8cyNL|G!Csw$`O zI4r@VWRi&>3el7UF)Pj`B(T21;p>jDM)Gk!NofH$;S?FtCk&<#X>2n?K4an()ll%2 z>T```!6XO%wk%zi0WtWeE&x$9snH6SkOV~_E|>)s$Y>x3L{PiIMKd5CQwR?EawE|x z#3=)LLlQGsrZ+W3h#3-jq|rsFwn_~mC~8tsWh|ut2_#G|=uP|t%z%mfCS`=VdHZt1~>#PXijjxTn(zoB`}ddoPdx_F6zgKFd}6_ z9!k>?gg~Ju{UE?nc)IiNc_!m?=IPl^PbCtvKvpkkN6{F~LeUtN7f(`5fl8eOY8S#p zj9!j{ZL2B;Vj*&8_+9V6fsGi5BEhS*mRWQIx9b*CHAm zJe|~YN`b<&ac&*@Vvo8X_ly%^1e%s|;!g8gEcy?eO>hC>9~~6a>FOj_t2qS#3X_zGt7Qe8>pNtme7>?A91$Qv z;R-=gRR=KP1UUi9x`269GbKO)GDrf^p*fa-T(}$^9R;jFnt@3>>vs`|NkCcyT>1+D zq_E(IF&Ug1Qw2;a;BFi-3hH*-SSwI1I zOPrwT5PGGw3^PI2;mPwh#6mb=#V_Mj(f{u|TS4h$E6@ zkV2k_z|7-&4t3&%m?EhDDWX$KPzE~U0?}i#;Fy{ea~1MPW4#^KHmE^l%}8mw>LY-p zU~n6XtYLx?Wl6`EYyrD~Q9gsVfV>I$7K4{7^pkZA%zkKdpot|aiimq? zvZRU<+EKDB&6E{K#i&S$f{Z(4M8Eejqw08&LDwO7@;EI(=fy}cbqJ;cmv*uW(Rb3p zlnfgoh6gXkpsPXTg`kH$^ckdD9^xplGOdA301CA}?ns#v7{!zqg%j%p}qH2?8COPt=$nBf^MO63pX>q;B3sa zc|Vy{C0|(}IZzVm;U_9Dk)oIal{yL3E`Ux>*LgG{&wHL>Q1HheBGQ~FXwbVv+>5kx zYG1Nk@Q~OOkg+ov%xE}Aq6K;SZ&MHz0p&zJBEK5`!Og^;LmCvGEmzp-8?teR5Xl?H z<|aEcjnRB=p1336mPt%y2A-H>vJYuIkT$_l0cQowLq!WI3p^lZX!v5_`<~d0VX})l zuri6EXe(sBxPzaYn42@fK zX-Y_l98^pm_9LK(rf9~xGD6Y}0T#?ICmks?DN)w6EMOuVBsc>{S_%LGa7ni7bteK+WImyZm}(#%<0u3=(GcT=hZ@L9 zK~5Gmzy&ryMqm|9KZdBKG(yRQlBP5@nrP^dKIM!o3h7){ATPKYW(;6BsQ+c0oFxae z@p6u?Lu3~OUE}0v%1+9ulnk1Dd<02oao-aW{^A|hriD0^TrnQdNtB|BsA1qeP2u8- zn6(W#;c4U*7@8sI!BLwihMCq)9lVJC)38Br;wK|%;3x!etVly16fyZ>a>c$VXa*@) zhGK~tad0kcNoX*Dq^dZmxr7_w7PyIy$Z-r?(Xx;&g)~5g9&7=O!jWnK3EnVCSOQ(!z}flORZJW6FCb0Q;Q zrskv+hK-*Ony!eqEwPe-m0t*0J|IBHJ?WByt|w@c*U82F7?FZN$U|wGu6f>c(gFoV zqGKDKe^dW+^Hk7DCuTQh+Pt4k>R8W$sex<@ibRitQh5mCkOguQanb>nk zgRRe&E0SHb6btW@p6*bS(+|8<3Zwblybl2D{1R{tl^J+qj>*2ItjnE1EU?c>PlbzV z2RtA_&!*WVz)&6cWSIQe#0W~mY~X4z{A3g|=bw_JX|+v}^O^<(QY|A1OmPfLvwZf# zVHK?dBNp%!C?vrgCdM78EmB&9v}Qm6fC&XS3r@0(pihxe!tyZ+@zZ`$r24=NWSV&p zlDB~RaH3($usB(+s-_j7kt!1%R^F453r0sUa8xD>lZ+L2oPnd;;?J{5euCj<-N}Fy zRY(}%PYrm0qY&w2OG*+R8W4KZkdq}tHE07(EGegH`mrPeq>`bu3W-g&Y{+OT zr(xp3hh52wIRh;Olu=1+J9nq#$>G96F>9I-*+tPbv9^_VQy>xWED>=;k|tT?{n?Q! z2!~ciPs;ItPNEc5Oim`vyrBtkP0B-HP38^qNT(s_!BLxNmXon;6I_Y@Gm@I-7}TLE z`pGeqn!=+IAm_>w;1LouR9jgx%s6HQQ*$K)tt=Mvj)DeLHA&Yz#eh(WQ9+Zaia!U} zvYzE?C>En415knN$tyZ_H#pwH&1_qNT>$z5%*F)&t|dcx7rA^nSm$vnCv^+y4)#I{t4p*Qm>SOZ~?0qPzdrFa1>>mvW4+e zShff&VJyq)z)Fm=m%z%RuA8Q=+GbMHWyiF2+tf8vFP_3m8ow!8I`FZQkdLqu^C4j+ zuGUSPhT_B#!_yGJKp;n0nUOIv3&6^P?VETM(R6&Q%o0{Y-vd?_blooGWaJ94G6t-~ zEeI?9>5Oh`+cvuEG_2Gi)g(xk(F3e>U01f{qz;ps2?El(gk9C74PYWAr%l;SCFERSr6fVEm7xAehDBIu=~>y-@tjnt;3^mZnl9_Q zhkjFV58T4UT!zD5&d%y)0c|S*D>WZ04O~tTXG0{j9Tm`H;YbE@Vltk!eXQgQC4=ZY z={O%N6&X@#oKqDbo}j8gO=$lD3{(`iek6#`gR;|Z9!ut()3GZa&xQYV4hg|HGcy-cB8c3lb+tdyYzP_S*1a7Y{W za%gL@AZh$K)Nu!WwPC5a5oN72u1u#(okE>u@+4eB%dADE~Sa*d4 zq-hn)%2rAma$$i6Oc2vxZC5nBWP>wsbX#b)ke~2S9ht$&X$g|al8$CbmZs}ClFQ|w zAto)^Q5?{nu~bWSP#DM3SXuK@7-&rwYQwl_azXEgsNG zloFkkB42e3DQ>BiLe@xD9r6a$QcXv-bJQSKHpgc@*GB-ufgql9OzKd>usklaT*_mmK=5zCe zl>xU*VwAb=i8b=p#0QI>(7cd;!Z?A{tMWxl7j+TC7t>0hwY%UyhG|@!X8@6cVN_hq zZlXmG&d?kkIB*tp+NSGzo@*DpjOH38uizGt^4x~w(%@DM7-%6@afF;&s!Uh%DsGYm z#9>9r!qu)Vmgkwc2X5inPTtA6wOXO#dJWIOqK##^xRY#UsiQIY zN?_Gu$@X+Dj{^c`*i5Qggnt_IGmX)FZXR!xqylc4 z#8hVBi8Utskj6FN81I5ywGDvuAR-LLujt3mGG4R|@4^8L2|9o%39@j%EusiB*^k(Fv(4T`d@#jr)Te z(t?AlbA@stxT`OJ5yPJ<1RADoi{Jddoi5lOh<6+utyZgc+0NvA2(m39 zXXk9-KW*%k&Dlc1E+9ieCIR?eZ`sen$Rl<1@s`8Fn- zRL*YK%WkG!Adj?QyQp@R+9X%5$>nM(MJ?tQAZ1qy9(Abe<*LPu=V*4yvJD7ofJe*< zwyj%cp^RfzIX7h$Ae$Pq?V6J<&lX(~w=wvrr$`x~sX1VU6SXe_O$Ijsn zs*}g%HU?kSu+2&pa;H(UU~~f@XHt!dts0iDv}g0{eHLu+JPnp@Y{<-RN_K&Gm>%7f zXqz?^1k1=n$k)jBHjQt(VvSP}75@QyfRb;8T*n!~ zES0zt5;UKbQhtmmi$KUjX|n9-dZj6w6tb`sp6+Pn1s85+bh~|ew$oFBP;wP|CR|lh z=`5_tsWg?x`FKZrRO%#<{tf8Vt5wsa(CgJ|6vz%uj-EcCP%dXOjzbb2J%35ht>a-l zU?(gGc$_5*1pqke2EJ5ECP7jHl&7egel1eL!P7}Srxg0_Z24mCLp85ns8P7WE76^^#+j!bE?ldDv^lH_!2WpuoowaRrz zRAl_cbWY3yk08dNz*=|4MJ_DRSXLq7l7-2|vrsq#N4KrjYJf|q8?f~F&P^AroRulN z1rVRj;ur)vx90K%s|*iXfs-!bKQ?a06cv)dR{4awn6fe$ID)K77n6lZ@yivLawu|YDM>w^0eqd&5}V8h$E5;AcZ^; zK^26{SSidWF6u;d5~ZM%rxf{asbFOaZg;wt%XCZR3szc!pa(}SQm8eQTBC}8Uqt_v z`9h&y_B#~wVWYwqvbvjsnFUH_3MLQQnvPwrrO*YnLenXum96<+Gn=P5$tf1vUb&pb zJuoK9xqP)yDYR#%rVHg>+08m~7B&>u)w3`(I4(q)x|y}7>bbIQS24rLHuRWOtM2NS zYiiwo$?vgAM9Yy9?>egGaZubjnjSq)&O!h-U;!u*HfFR27Bc|q)XW%0l$qB&(nLHI z?)dpQSXi>ef`APT0u~`c8rtqjC)(92Pm?^&C)KncBg!KX@=zL6H9)IAquLbmSvt1S z`Bv658Q<-0%(Qu56stLnuo7}#I-kWXn5Oc0g<=X+>LgIJ5VqIrjzeKEXf!C0L`@D) zAHbAktF;=(WwRVddOGBAa5X)LNq03=NX}U*$MFJRt)@~SDuF7esG5E)(!s&a#GXSM zXnwX_vF?@4LbI?L2<=8=^Pw}-7|rMA>8bR9TP87;8F*ri$$ql{Fd*+z{t4p*Qm-So z!2?+xc!l`{d4Q!=Fd4)2-&DtJD`6;h%FqO~JUwK1N}Hr76Y}1mTm}La8a2Twd#zfd z+(4>U?qk-)Ok#R4p*Wr=c<3O9YXLbt*%7i%&30h5m-D!~Tx-;VyCNg9!cjKQqfYj8 z>?qm7)KniZ(GPOwoZ7U9Cqe3+LP5$SbJ6qq<#MCn^N=e8`Nt_G0V|H<2^o1F_*gE} zZ2<^?ODunqX@{JbDLVz5uX!a9pU>k+tJQ*rD3qNVJZLR@WsmS58;|pVRkds_+wf{K zm0RMp(d2ety+mXv`{Lwri40-@w>zHFym-Lv~>8$BwZN0x(A^J`_jTZeds&uEoY56?o(lauglFQ~i3p7kuWdqEy7ywMJ{*aD$9_U)7@Rx-+m@BUi zQb{S|KsldZtkqhJgFJGB15!#Z-^f>; zTE1As;ZCQMZ{-ExAv|cU!+REstm0gLEpi)@y*1vl^2TV#l8)w(%Mm_l}GOSf2Q)T+>E zYW-59QLJKApp(m$=}NOU-*5M-jV+CQA*U7dHCX+u;#5ALLzg)n*t%yr<%Z`?=fKRE zUIlJ`CT}@;*Ie8>HR&{eOUsdpSjZXK60etvJWY=!UWKlocRl5*~Hr+5REfjr3j4yfCV*Ar=dy8X<3ybm6I<8 z9+d^r(Gt(H&=jI7318$kb zD0AHtYjkT9A1r#-@|1tVIE_3NozwcYqE)njSC~(LqtM}}!G8?1a%-1?W!$V(MlHM% zVAm9^$ogxvegw{{)hat$txl&^pX(^aR%OtcYt13mX)QNfb+nFG!J=-lf>q&)AQXiK ze}T8CR0=bYx3K13!_}?MTqn3IGFE)?AxQeMgV-q7XJ=Pn5n2gy)(V|LMYYV{s9x7= z$WX6TR$8sOm2DN|5`+r=&pwTXfhe5DuAk$W??^v0jiml~$ou zsnu{~I2=~yDoU%^fyZKWtJ11;P#RzTWeP;aPfbb191u}5tEFlsm#-9Ont%w= z9Y{r-C{yc0gs04kJEc%w*upA)KB=6m_*{y`Di=@L(|d!wZ-rbpkZ7fDSLq_rCCB?Q zq9#J8BZ`!+mdFs^-)a{rG;4HhqocQLa3OKqMps$cm}&FArgiHsy(0_I7n(IZIxkRp zibXL6Ds>WQbqupXVE{Fc!ujVfEKs01k6hV<1 zWH#HZQ90$Qz<0YG2cpuToTqA1C3qn?xS7~lNe>Ldt!}+*2Du0$OAh9&I(wEiZ7d6TO?@$uQ08&img(s)CK=B%s zMcrxxtHLmcRi$lorB&4$m3eRj*7~P$b+^CR5AKMJ$O;!E{g_N?_EdW?*lk($-HY=O z#Tw1Z{MNeRI*TjqwlQ6A)Z2~5?rwK+_pSzVLDsKSW;rY*SE~&?tqls{=(dZCi~W9I z!}Bcbt|4`+?TXrO%vQRM>1iC@wryKuv7vRVefUP*>NdKKe!EYbhSbm-R=?3lhQ8Ie zy49A}aw*-ev@7++QL_O9-7F-vi*{GZSI_HD12bClGo(|Wvm3N_pAQGL+Ou!xa5y#F zs#b@CLW_5n&6eY|98d(}h@>t^Ax}h51>w-jFoQLufKH+mRn#a&e)s%rMd~(ouPnB; z-Sgy;wi+!|dmFV$cgwcy;?|)=E!OU1a4+>|(V^|u?CiFoHrp&Tq+Wpz`_r=+73kzvXJK~G-TAUD%d`Cp`iEEiy3s}t>Q`F9ZQV@ro!Cr3+LAREZAi6z=owY z>Rz>_WLwip6-I)VVmGH6^-{TB_2*PMH&UuNF@-sD4#S8t>$z#2^4iS?#C_yb>ZN*< z_`o!s9NxO5_h3Y5yCIl0HQLWT%>AJB}6GWxS7~< zNW+?Xwp_{G_l{@BXUrwlZQH10ezC)7J~vO#jR)K^iBaadC)OBuCw{g`oV^3w?~&2Faw5MTg)&hS~o+TNyl9mu1OJ4f3G+ed@B(P00u-@{y2ZDUcl(QcdVwyZSeU39J6FxvGk=w(-RJGgo< z+CB>IiHrb0xFG4r1;p7_Z)s`2>$dlAhYhCP?$o#JXd5|ie15NIb#S2DZtouqw(sBD zMlM-y*K2(q3(1W}+mKzHfuq}Q-@bh`8X5kxaip=hZF$h>O{t@Hzcy%hIyka(=g#)_ zwlQdo;6dx4J!p@5BigiORKp#$N60X8NA938XU^p)-J9x7wYM*K+Ld0R-)?x9?Qf88 zynNK@cDmgyb6b8Hx76K#5ggF!z@-;#-8$W$Z8WwnRk~vKJg1w@cC(-e#1TmYkV2k_ zpbEmt4Q0@5wB>+Kq7+p$C`Eq%mVQkhwD+ICy{GNpLLOy3=TVCSfh4(-kLhdZ|#{Z6GVw>pgq+M?0u)GJu2X-#k0F1OmP?Ss|UXn<$8UUBiR z&irW5AGC&pJ>Ai$)5oYlC(q4q8!inl*t>jQe{}Jv-I?|}?O_!UwsaQS?N+-{t?g)Z znkzfzFz0M-wcD5^@h181m3Fz-t{3+oTp;>Rx;zGtYd|V)bk%%!R&7+P-L9JLOwY8c zQ|*R7r>dnD6*F~p8gt|#L`%wS7H3V$o9)cB{CsMw+VWLB+xCiB>Y!B;+1ISsamJqB zy4Bc<#8z^$A0wJaAmpL+49?c;D|-q}3iGpcY@?&M<8k3;rf=`xm}&FAZEx)r=?$84 zSyJX_@dmm=XogF)-rzyPhl`nf41#qbET9s)CO4Ehy+jYIu(e*AC zJV|`PLmCCbP6H1-l*{w8R8FfK_^n$d2}JRrT%u}HC1fQyxS7~kJ{XxpU{{COb2Y(R^;69)b?IWfG&zbx*9ZYdGJ zORLLE3(HHd+`2Txwe?=E*K7BB&RkE`X4E~ncrMqrd#&vdo3Q5I$JI;AtINSnk+IjK z<-8#2#}&l++2M{IhjO{zp&;jcYx#nnRV?nhXgGBH-Cl2@*E_Vdw0h{$9&)K_uhm)- zdbwP?-LuskD1-ws_Uh^?lv^|B`i3Y(J$0!)Y#GbFrPflf-^Y zTk0+KmQf#UdTP(<<(7NPz17}wZaKHqUa%KRlpeN*t={T+^S$~)dDLqcuRPQy-@a_Q zKkN^O+ZO@S=k8X9hh7PXN%&Q-xp?Q!+0kI8y>mx>C>37r4GV=~0Th8aB54VmeeqXM10u=ujAJC-iKvenWM(cvseCPZN7MD9SJJF}bnKl+2_V4aZwR)}Up)0l#eJ5Qhr{;3@T(>eaFiL~I zG1F*bKFrV0&Ot%w&5UVI)vFg6Gc(O5EO+BEL`%wSSNk^Q&Ckzu{d@+bl6m3-7+|Zg zIFYSJfwp_nWji}}+B=b?`DCZ!$B4EdEHCRirRO@cv#r+dOUg48w)E-PMu!JF61eItkP+WH-6sg4tOLS6#JtF9mR@ z@4EF)nZkMJS!Ta)nWU%B%qUplDwhd6Z5Y+3rndB{obDj-s4Uf1K%`JLsS=779NbLo zIiz7gK3lHLp`-go`_7n4X7=pa*jz8YkI{T?-d1gu7~+&ind_cdW8c<^2N;kCb_ARi zunra9Egsm~pXyHmuiCx#QhTYh)Y*wUGR&2KvYWwtRUQrxt|)jUXOSM9IVDHCE~e|@ z>sD4)S67zyuDaco;r`XVD|=U0wyv&R0RY9d&0%3U><)+70nA}t{bF1_D0JQ7>@GYN zgXQBzT)ncocQv>xG7g9II9QPMV=|?c`b+1ZeffoHflyX&Gswo=)1aA_&G zI2aC=hQljYR`y=`sv&ae`e1f;SpuwdyF*tm;0zqiynFZV1zb8<09bcJeWklJYp)KM zXIF-ci#WP}|Ni0LfxFUOg$J!G!<8ZMpEg5%m>m{YhpWTA!_~rSVWqn@yR}N`rP-y~ z!QKlOhcM7>A9TxazOqZc``XpT(c);dYum10#+^61@(MUez^{Abq20UZm$&q~yU&{* zDW!w?QK>X4fg%t`B&~o}c#yC<#9|qFv@*fyMFa%JNEo$I7A6>gwV$Mg=-~>-d7@^H&bNYTv=- z)vH#A3-jg0;YxcroL<~Hz>Lyu&t5dUFn8fa+g9g#yD`I*dtKepU-|Ok^z2}^e&sbg z*ZVBkR0~d_;12s!y^&cRZ83Z8_Go057UmbAAPjmtv919Davx^hcH74;3?s^%t#8Rv z-qOOtz|Uv)+k-{o1FR6%HPuwRw{20*~Vb?Cxr1TJHXJ=o2 zxYnaE-a^MVIy}&+;X*29qZ?kiG1KP#e17-VI(;pn+0=~j7848ARGwRmi{ zRvd|=g9m3jN?7Vi;qZJ<`{%(w%>CA%v2O||*_~^TzwqyCAA9;~qV-kbheGf2X-zZl z_$I@=__K$KmAUBA7k@S`R_k;4_Io_aL|FQ)eEK=OofZy8rJ~`Gn zVJ`XiY&bFsv13k}W39}!Uo?rtCbgP)qInoihQ=eJPK*$-lDx1$VYb6In+lN^?)mZE z-KSfBH~Vxa{19vW^rvh8D~O3s?ZtnN9E?80I82V69p5&U9cDxLm!NFuQsGhMQTIE_ zcif+4V{AynKgWkvCW&t(GWf>^k-?Y4MF;)wt4=B#R!z=}NhX(zNlq$-H&U4-zCo7cOtXR)weSLz7OJS>FeAn*QtT-Ejq#8^ zV}_Se7>B+=!Qgd8RT8)I_#{s_w3RVG#u*>uD*RYl*vEg45Ap9Pq(0^(KZg6F@g9*D z@(jxy4SoKh{QYmB;dZ!r>{;>IliTCeUKoCe*IO`XNAU!jQd*cnd|R~Le6RS_ZEce_ z{v!k%A~f;g!`B*0X}+fv<_k(MOrfWh!Ue4mE-8gDp4}?E{?hxubN<@DvNM<7dnvo+ z(vM&Iz@P4B^|k+g>Ajb%ef!dPu#3Cb{@7x_d=GogJ?sN(`zhSBcF#R)m#|-6yCk%Y zy#{=B-ixh>65Gx+m|14Xet%rK!DAzx>3)CaWrYKU3ug}ZuMS@^bA9+-g(EZX>EBkk zY33t+arWqofB$HD0Vc-s-1N~Ge>k4bm*%cc3>$i8kk=cXLYmPs1MnN84GNvk_c8-~ zNMO*(6b2Ea8#@~M?QdKdWuwPf_p#7#GfwD%hm71a&7tmfk zr$VfsP2)?*9cIkj#QZrJ|7a*PPDa~D-YVWGhQ!C%k1#QII2N`=_eF1v-i1#ZTMOU$ zkk1K+_Z&NZ?J;6nVuE!K-q`4mqw9|!W40Z~@#7zmM%rA(~d&`7NxM&kKE*UKkIaTX^-%FE^D#RvO)!03=;TUG4Ml(EN!gdAf;v>Yp? zu%{%j*T_!g*lq#Qbgi9ZIweFro5`_y6p@&gbBr2C#8A(X^Sh9CZe|ZFbA1SH=$=xs z$`+}AdR^+CFxz9-jp`UWhG}C97#(#3X_ICa4#`W#%XKvMhXXfvi#J$t)lB%y-VgN95%)+o^bY&&PUs!Xd-Tv9#`~Sn{n}Uc(EGhR@}WCY z?{`Cgn*Vk!^tAqmPUtcF+iK{I{m1ms8;z@ap{r)tOL~X;p_lb8%Z2vn%TB0cFXTd1 zr<@NlSOsad@qe{=wLh!jRmXh(u~ZA+hig^qB0E?27Q<=h)@%7;LO6;DsZ~WTpKJvboKk&d6SA61zPu~7F```YN|wSVk_fhX^ku1-CbH~5 z-1C3!x$KFX-(6cOfnU~M{sj9omVfRCCw~6t2k*G|@!wy|tr2sdD*vi+W-2r#hVa=+ zmXVW0LF#j17TbrJ&xK#bAARws4~t^x0Cs-%upsz5&pym?{?5OQ@rGgOfWYS<3VcOg zr;#{x6P-$lk>>CWUbWT(ylP_T1fg(osdoL%Ph7U=&(>bf{*e90C%$>_9hd#(&rkf| zxwZdyEdeUW;VVM_H(JSLZXKVWX45H$b?^r;_@ER%&K{4Y*$}>OgOAQI_#ZSw#s{rp zQ4@DpWPI_9%cRvvJe*)t_?rd#k4lwQzZHuz=`TRcxo6Bpzrc(oWsb3|x1nW8yxVh} z7ST+Y0exskaKv(J5L63~#$ms$0&vtqk+L$0~ z{r=z^PhWUxcL7}Z^tB(V?y+9;N|d{uJsP??^k(4W^w@e^=xyQ99(GSC#Fm(l9eo=z zSdq7Vm}>UyVev=I%${Rt<7)x!^95~tsLmdJcdQBY=(%~Q9$C?n8@_7Ic2n>jN>}k>mFrB^l&!Gj3pfI!ln@sq{zkg6&2th_0=hcOehuKj0bK$Rr zL*aKaEQDh4FY=|rKW0Kd#`wP-8pGoWDn*ykbI8C%3eNr}Ptk3$C-e8>ec zKFyC9HrM9Uh@M7$s76kpb)9~v1UA4!f2 zrMXU$W$-6Uq2%2;=?LiyV|-3(E)M}T#N6$XZ0Rcsh^}o?S5np4!`Cv08Q(g`B0hK! z-;$;oL6-p{{yd?GH1kV%q^cN?9k}M1``-EBb<0;j^ZD<;`=j5y|ArgxzyDWnIR9|y z88*T$edY3p)?WO<+S=M5J@AP~+0U$f{9k{{zJa~^xi{WQW91*vfj>vzrI@dc^I_8c zu8G_jx-0a_MC2{$NJz_z>`ZhFZ4=sQ&=Q>}n~#p8p{SL9j2*EbfWl6!acvXrLYNANM4-EG zBXA;(^DuL`St!v5^yk5az0l7e-u}$RAOD{-*G1m5^@iM6cYW`2s@n)-IgYw{?0443 z621eBznxHG2e_jz9)B3$-KU-B#`u&l_B=J}(GbPgx?%@BkIs57zOsG5!vIBmH1zm5 z6T%C{c|5rRbv_66cjo&~Q~WeDbBrk6M)6Z!8oU7;P=WZ~OUN&9jL*4aFDU1SV|*nt zbil)3Bp}AUAH=PRRqzMQ2e9)mW4{$&sx;yeDvwI@3mf;hM7P8qkA5rmc>Ftw@4E5x zG6ypk^VejqvPhzE`toaDlfbp9{U_+VI5#t%OHjQ=m@zBjo zo)MuzWu!;8g)R@hHFRSr5_&9Dh7^9+*Mq@Ml68~dh31{nwqqv()ySWACc+a**}z3& z0pfrm1I^{_2M<=XLUn*1p9n()!ht{xK>+`{7=K|9GAh6Isek>_C*SkFPqW`te)bp7 zJiq$IKm5%@-UAP8A6@a}uRirpue;{gKYfStXMg>Z2VU{zCw}|(*UVz1T=L?NB05G& zlRdcJacRpO(YC?9yt*5$b0scgS`Z`hfS%BSd>k6- zj~}M&)fob`r)S7B^1`-lm_&|Yv>f}k_-%PXe5%={fWb1JisEZgJGooA$WG}}>77pa zBK=L`)#{b{JGghNw{mx=A9TLJrK0&Tsn>W{hQr@ou_zZ)&2K+6Mg<;cO{UIpP>=Dk zl88x~v=w^8xCSbtAd{1iT%LbxK9o0UBfJ5#C_8-6 zdWIzpn?c3Xr_gHpDEn*ceMo99dXX^ec+;PP(2vk?Q904?&p?U*tD*zJt?ac2Ax02p zL0bW44NmU*6DeUE&8ZYK&}}YxICsxAH-7Cm-`l%DmD5L#-g@;L?^GWy{N$^@^1W+b zcjf!;Ui|F(^@xw+H?39&;HQ|JJ8 zp7Up@6f75GEqVh+XzsmfLlSnEi->}2UPPCJL3^*1BciqAc) z^Y=ku=Y_Gxt`qgB5MiGo_5rGm^F*F>xk#3vyYM$!U=q`35|d?2(F}pufJw}|^YRny zJR`98U{DPo$q$$ZYgQbRK=r`yYoC16`yPJa&P(r{y8k1gzdrHkzFR)}B%8SI!^i*R1bdTs z$A_N!%}+hJZ<`+aKVMmU=b^Rd|Kd9zeej1Qwe3OAX~4WJ(_r_lqh3y6bL{0Tj3-mx z7&)VXWjkg*%8W96RY3zW35BMQI8Y(~341_dr;<1yM7f5OW(#;$u zk&R|4Vjn`ggKQjHABkpAn$TG?unnEXNa6q~oyF@M6C{j&;YLi-s#aVG92@^G5-|DM(_*af3ksP#D$#@ z8^Iq;AHfm;HOW4IiVlZtA#o)6znOjL`^!V&ZP>h(y%EBr1t9VvpY*V6M|+MR+bcd# z(imw5GdJ_e!w*ZLYvj0wl$B|N`6cun|`JJ*iw?hD@xzwiJ1VB|j^xbvPhdF|&% z|KHC_2C<6 zAsbW?;8F7!Ep)7FB#-)p_{s+nf*m5{Szj}u9sk&R`6T|L*cZk{%1Fi1;GzO4rkb9r z--KmQ7VG0-K#8vt5eE(Cb+)dbr=M5-QRaVjqR9^XUgo{*4Uy{-*QVc|c?WmB@ge38 z_Rh$yiJQ~6WNzg?Z2WiW+lpKSr#$H9ZQA7XGqh>tG2{Pm?A7xb#xogw#Grj&n>|H| zcjQ}1HhDDk`mxwNB8*|q@4#AvXQxBLQTC(X=$c1<52`cHKUhAZO$7P8Hr7Jg-LofE zn!r9r62iKGATJ!Ap~}&uwazibHIVGCWv)GVkUeGiIw=V-Foh`-D<`a9;f<`rUj4Q= z{pedy{^XiB-}d3P=l}Yz*Pj3AYj3^g4IlX6>t28R;(2%PzxhiKyzj;@hMoE+uDyK3<=G>h>pI#M;`Ecl`l-@f&V=|K(TQcFPMd?zwy4@7(mhFMnyG%hH(ifQnxY zw1!91IZOc+2@}BcekapR;7{(@`RRCydeo%;OwM4L~*O_xMxI!(9xq!^gUQ!=UGBAc};dfA_~U=zL9L0}-d@&T+$^ zJn1igLT4-W^s+V;g_2GcG@e_uFVM%OSL&CRUKf6o{$~62rR(kYdUx6%@;;^CZ$DxG zME_C#`Mk2Ee^!4$4=>iQjD>16@0Ku_%tAhvpYrzcm(wJRC(hL<`^<#_BtJ|<9`AC$(U$BK@qeytf#v-uXK(1jzd9=mn6YI96YY9K04ftK| zvx7ngQ-p8Ah!Z~e^wTeV5yqTQV8!z9%`+bu&m2j=KmF_J-%9^94Wk}gO$|~nOI?z> zD)mU}f8wv5@ppJsW_*PIm*XSp--Ymb;kXnXMSL=D#@b3OK8mD<>BZvk2pe?RdP+V|jNkBmR z7tz|fRoc3*v_(ZLN-d=-Wb*yaeUpIx`+Z*n^WL2|^WMC7@44rm^E=x@;ji*Q`9;Ab zOwn!Yr8a;Gib+yM>PUPDUbqeKmhZF0%4_I zju{*kuZ~)8dlA+;3{^z1$(KF-+fK%-Y%2M=Mc?A_Ut{DALv)~?0|X8`_E*(2+#wFQ1`8>HSJI?&J^0qKsm?ZpLpCl(;i9j-$ zsok|OdU&P7G}iEJ~l=u7N6Z+KBCA8tI;0u2HojuI$J36Y<3p)buHw^=Eq>ezQ3bk zKv@yKZxIT``(cHwX4T=zlMHwsHs?-g+E_pV(?hX73f$*t3zVe8pvQ)m!_bv-@)rRl z+=fJpC_v4buuTZA zkWVWsazo`1<(mgRFs^xP>z3~pPrARhpnXGrXRpKN`ZYQ%r+lB5+w+K-|LY2 zAbloen-6DM@=>WX-z35FVWDmjRNI%j$V71;vqCVj>K%Gz|n4Wz4VgfrI&70j6G)U=o z%odYgN`Hm01i%Lc-X~HTCLu|z{G&Me<0m@rec*lekGqEz#4442KMrHhWrv7U*d2Q> zzjX~s{da&jeTk8BbIsGcSCBC#!wIjKML$}(=5&3bEXSIW zC;WNWOmSYG2!WN9D*k*&^b4q!5Gw35v8pf4YZF%HZ4mBJo{%24@0B|2zq20{4(I(d z5A}=k+ z!6?zuB}Jgh$+_`S;H>ws$QvM+viH2dzo z+t}a(AF*BopRYXd%g6pUZNcZOxBOKU`+a-z1$OD5K7)Ph&}+Rn-~M3s+uNSaetFAt zwA$Ve-ZKRh?L^;wrk5w;Y_Mujr`wh96ja=@g~d6XY~e&@LF!WIuK;5Wb(RHQlt$cN_pZqm#DIqBudkhHHH$@S|CkE8xl6p8ayMU_?A6h`=mh!XRd2O5?O zWSZ~>BvVuhCYi^9O-PL;SwM3q;g162HAQX14}~Zc4nwd<`9#oXDp`@k{Hp-m3{uab1%xi`O>4=>|+;JJk`J7&@UhP z{NKoQ#DoXI8IGVIt-?m#Z&LE9LX#lLG|e?jzEx0x{_NNDT%IcP`O>I_mavP+$SYg^ z>pr7-`_t)a8#vV3iQQeSGROaeHSI-FMwnopW&43o!ThV(zDySC1gS zsb8E?%%-be=8=kugrp}ZrUVQFb$^1o5DqBF!7c%ru`re{P8C6h#d8(U;BYoUA-UC; zC>&G!1I{q{>Wkd7K)4sT6|>^JbV6eqSDCeNdS-4UFp0;wS~v$K|5&(4VgUH0#y$f= z#(UzIc#(|=1U*D9L}Gp<8I(-6v@7LL$J1&`P8X&^_PnHk+L%dv+2>7|@vcBgC0Wc0 z%J#Xj$+skvLXm_G_qPItldo5S$+8A8umo_Vl*oMY}Isb>Q;kfE$^3+qK8e78{;qore}L*~_NC_YPaq zaq*sC^M|a{l4wO8s^4$JOqwXz-PjHgy!$pQ4X_8mhvsyX6>LRYKkaY zRm|W7yg$g&z*js-JFqLD!>MLGwgmAYvoBUVMtyE-X#=7wT8-I4L#g4S!vABH%aI>n zJwbkFaGZd#*x3?xmH+@d!@kfi4{Q#>9@M?`f%Rgja^ncY1@|6qs0RXs!;mmCFfhRX zy3$>U?L!B|9}gVpG%5Q!9}%bgI81Es9D`nX9uw+%^nxV3zKe+h)JEKO`+&2!@wmOR z%Ge_t4?kW~YU~TS>+}BhynN$*D8l!^N?cV5WrMO!0j)wYTHC;Q$-?Jbhn1$=9v0E9 zay}}1BcrIAwvqvb?u<9+hR`7Hz#GP*O-FPNWm{VkUY3A+Ta$N&SN5_r#u5ffLxTCL zo2Pt~TSi8Ec6UF?QCG_%Ly{pK7*p_Q2Di^0f@sm~UyTRhz;=4zHuJ)nBj=SIXnx@r zFTBAvhqhilc*(WWcc;R8UY+*|@kK~|%6UZ5EUV{B1vRy*rLI`3HucwrYm=na(z}v* zx%R&FJ{BR8#rV=#qO6r~Q687SQWZ^R)$%(sd8;4NEuLhRl%NfShIU&m9Jsm;wd1{- z+sm}iOIBgOzugm{iX_VkMMJ@7yu)-dL25U_X;2Ym zCYr6PplXt6h2FkLtka$FRoJX-SN1E1;21ngrDN83 z1hG8|v7Myq_Xac@04BXT)^2lCe;w0IX>APKr>K_k@^H)47F;3uU6K5wbaeKQfT@OOQLa=X1Xw2w74ZB zkm5kp&yo!P&F(u6i0?6T=iTBjx`fUX$1wd%#dkZOK6RJ)`Bz!QKyY253{;~C3v?S3 z!2uOPCDdw<_^9qQiOmOYT1x77Vn)l$pS|!)nauBszpS#bv}3D zuf*r;kGuKAbzxe5-tA9gcpc)%DTCh-Bus_?@?>h*zu$H6;C^z?gFibTEuq_4DsJZd z$i%l2-^zTe@GVB@JH3!NmdJ$)ESR(!Bi~-PMyzoy34)fPFyn;p^*C|+G@O7Eny1Ri zH;PRXm;)l23fYVvoUX|mbSdpwaiZWsUrp5QIq8M}5T3`ltUv~i%<*#2I=UQGZ9EJN zh269VXLb6E>emUPQ}u~aRbFmeZTmgC(l*jI(kYe7DSI!+WNE6r+P`65tnoS-Nd9C^?B4)d&0-!xJjkNWgL%>_RleLas zN$wP|yQbDrQNx?NdgEx$@SkahW{){aSUT2b76|sQVM#2|tdP0yVnu9f-#{2X08Du{ zJ8@ffva;{wcejmbx>q{&<1qQPlhyK}lQbD&+Ee0~DHe9k4i8o#L(Wh`wg9)l9nn!7 zO`M1GT+2O;>OYe{G_M(|*H1p8(Q}g>BJ?u6&dHB@95LPmhubBJSzmvp$zdM!1 zfL$+Nhrwz)*+zQnOi2A7D{l!7L|MG5H$|KbwM40iUR0J{vJVxRqw-_H-Hw+X5(Vt4SS*^yZiY!v-`IGp5?v!A&ai~^0q%@ z-xXhF3)p=JvJZdsX?F9j-?J(Ill^!0O;*LC?acbi>}Q6qFL#2v?eNdusrQ=gp6?S! zyGHw_x~BSMs|_|khY$>r$||UyvFL*ZF+4-cz)?PR52EUysXL8p%E}7 z2k?v5m>ef<%oq0+n#z>k!AyFfQXi~KpR1f39GPxWCKOIiU#46oU8$^<)+!r?d!&bi zC#1K9w*#LEp9Mb)MPf=uC{qS1a*J|%=#KQ;X*m@rOIHPI(<4J8V?*OZ3rD9Xs*~Ll z{8M66@+QV7B_9=-9YrzxV(g#FqWz^9vQ|c61 zlmaD^xg;G5z)T11GNOp|Mo^H4-6@G`QNGy{Nqa-2QhB>67Y##VM&lT#Tr>>IrG}1T zmZk~bQDP)f)>bB#CDY(eDAEh+wQ}mUhD(1=y<>869;aRoSmkuuV6EV;biLww#aInt zJ^@>XNAThl#eoz9qVLf|iNxYu^jmhdIZm0QfqfM9_T!JEbQ*RtNL+k4=fsQ5VDk(UFXx@~xR~i@@vnWaqB7 zB;gRq%8y`CFdCjcU4L!&M5?eq)?QFt1^e$`+w+R6;Gx6!@TTBnwaIA2y(r$9zX@AVQ$9cr@h0_HIC*ek6MN~6!+AN-^ zn2ZE86KunFn-a2E0>MZ)A5ORX4QFzMfF=w091ejF9BO{r@oNaXh8sH?C%90C#v#3c zp3G#Tr#H1EiWU~N6-h+}q5pPX8yxOGtwDvP4Ezk*$*I%f!y!-&7Zh^yJ41Hc+Hy#Q z49F)r(ULD2q^Ta&lS@;yFmUfbP1T*XPFJ1VQ-?#%Y!R0d91!Ot;aWG=BoAH=Jyz?2 zXzwbBEhTx5UE7uONnmdTd zMZaD=Q<^C+k(SDGs<>LJjSZGYn$OJ}8XrJD{SN0H{(M6`(0eD-o@hE^ zNyGk#d~*?xT+uHr%*N!h_g)dbIVwg|0ekQK;uK3M0SG_Gb;C-P-`kR(ACUMumjUk8 zVnATDYXP(sgx9U{4{mm;B8T0opg>Q4R5hDqNi?xkQ2~wtjEwe<;ELje^;mn3287-m z(BqPoizQeSo52<$qRRxhVY}`{62X9^;HPoNqbzBml$Fx5?Qn<_&^WH!=}M)M3Oo}_ zV}^dsKSBfDheZ#KoG}c}ept)H{c-?b%b3HUIasK;W#lwu5A$Y*Y@C_n^h6+IJRP@+ zK!}2e2Pa^@0Wb8{Kn-iH@S#fRa>NX|POv$9W?ff9-~tPT>fL ztFt&l4O~&(efMe66xc3>BX=Hd`JA){W4Y&v4G>I(&?s*biDyf>gmG;-Ry*5W6T{4_g+FD5|Z96V)6fIKuX1GE;Plp*Y8_UEBj9E_&9rN%Q|NNQffT4^Z8vD9pLd$K<)))82U(KXXm_PJqiQ( zND>`@A&aUoQ7Qiz-dS(BD=X3G4IotV&nqfdSeZ~FrL=NeU)v1Z&FamT4YvKZW59%L zO*S}!SXD9SQf6TY*M{zl)43HEXQQ5GLREZ#bpaeEM8zkHiUmLQWkQ2CI$LG4MTn4a z9WSYEQdyh2LB)Gmg6*PSQhR~O){C3q#6m~h38hI9`$7ZV0IcemqCgA1sogpQ3kqqW zizvE=ULkU$!9o%VABCNmKqbPIj2r|x6R;23iwFGTA3LFpLo9;EiN9ui ztprMEf39>QTmZ33wCX^QY-vuyJj`{-N+u5K{QaNUHGSd*z1iBAI{|*4e79}k6<5fm z06~d42!eSz$rkKGJzXlK-KCy%s8;CjuJ!Z}jTDBvM|y^bCJU3?lRc9{uDjH`oVkwF zD_txS&iJd8D%%ich;6k0eC2%GRR1hxmTkU&sj}2|mEWoO39aw|%yWu-94R3#MlHy> zXKig1h@=cmz+^_}Yp}Uk><*{X=0mnZ#Kj{o%$~0Lc0~v!Xm9hlX|GT5L*)ccC?=TT zV{j2yR5jlpLe29K>ZM!qQBDs8Gh`1$13c-b%szoC> zP^6;nFl3|VZ^Sjo!o~@xQjD!1FCDHz<8~(!Fn@-_d)S&ChG5VV36JRngwPoYcZS9e zojv699+{vULLhknOq3WYe?eUx!_`$9&y)X+lj$;6HRzD za)&f1Acccvrzs!@QaEs=-KHx#zC=W^xCI2^H3-7%0k+_U_W+DXCNpFAXRrF*r$vzg z8Vi2)r^doq@6TV%UixhIwPJJ7mwg3uwc*Z5OE*{^yzw>1imjEo&3Aho9U6 z>Y*7s64c`rcj={AjlwKo6-zy(-T|ye8lVoa46xTZsy#Jc&EqB5*h3qH_HxV^;wj&s zi6QHk;0rQGm}zr4yWA?Kbe+7QY3iOCv1|>W|P2? z;3qDkmJ1?Y)N+JPfI1U41i`@t<$`At+6HaECc(*Y&#nt{%x90TnL2b=lj}{Fguw0# zbS)vEYW#sDF`vxX<3zJ9q0hocTaLD%!O50TSE5*Bd!zCb5~X+u{5hROmGvqo&hDK- zq%Sp7W(xvB$yC=ZCUAk)O89iRmXRxJMe;bnIOg7sccAPAcQT9S+wbeB)`K2X&PJhfI^Uh*s^YDYtsr*u32^u}JbjJlq%Uo*)@PzR(?L z$YE?G+Tf%2+C$uK@*WaXf#y*StjYsYSIe|=hdh_fHO;ktVv>o7Osd&pGFeQL1%;L2 zw~#<#QVm$PWU>I)Issb~j=~Deuy8Sx&1zy0g_w1Z7}hOFtph?5RL34Mq+4v3^K`8Z zX3RZomyWtvwuB&^*NC7~TE2GaFewXQm^mw(m5WE*L!Zk=By~a(3hlN#a#Y1lR3r%@ z^vu|S6OyF_QBuU@!2%H?l~J*{Da7h{>qc^Ta$z%idl2j^24>Zv+AKDCU)OQO5gq68 z99;5bTw}6uI|v8~Wb?_6Fc~;Nce%f$`}6?1#eto#ea(_hL(iGcVt?&?MqD6`$qu{v z>LnZ4wo~n$zobz@#V&a}I2N3_c6h*v_UleW-i~0y)e#m+Q>~Wn<;`J3FfFs@+psd_ zBd=5gT!%Uh$E~QIc|t}XGemS~vWoJ9uy~bd><8G?6HLaMBx@zsSQVoU2LHWw>~Z(+ z>0UaDQRv4@zd?jIvX$gS+2lG<5hy}@77E02*`(zgY zP;@KxWOHRT5?;Wu*3ZmIf2sIw(}+`lkRzvFX_mHjNKecf_0&_R%ome2W=*zL459zb z!Z1A}OBp7*6jMepdq8Do)6=q&!c5?l0v?E%Oq}whdf!}#Y& zM9AA3b}QltTFv7&{_y@r8mZH<^ylhDj6oe1@VW~k8Z5<|p}8%WM8pb$iKlo=D?Ygiy?20UrGmlD+=KiB_$7?!LC?pvK?9oZ%Czt3*Xwr(^P^QY-A?v&O* zuQmx6!f733=g0HFa+elMp(u)&qzW_DL;stt2Sxae3vggZ3M_Si{E>8t_z`P0tIx3t zA<3R+*CS9nbK>}k;uka$&GO^ykIDg@GXp7p z1_Z-ZqV5FBj@i?WH-xS0s777Y#F`M?B*CQ|{6}CeWH`7HjXmP{ooqFNR%Wz|VR_MB zR?Z{Mj~gyy$bZ3ayoCcM@M^4Nf4uFBf8ZxWgm;eWIwHs9vxO3&M(nNkve+$UVSA*k z)LvEwlas$DT30r*ti|3^HqYK#Hlyz?_SL2L1n!MIX7`s6Y(&}--~?nppw=Vd$4hpH zpDlSQ{AS6Y{2!O7Ljnwe=f@!gK;5G1JpPDGKrsw%q&3)NjCPYY&X>M>Ftz$Y5Q8-*q>S zukXx!_8B2ChY@#gggWz&V4zQ{fFc+T< z03!ewIzJ`ZDoi76&H-;k@~gm?r21KV=WTm#_>vLT^FO$lRSv!Bx)pibLzf=B`KHI4 zT$W(LbFtt>FI_gRVnOTN2h({sOc?gWsT z*!t)vSU7)p%}>Ay5Dxr);JXIu0i@MULLe0)ApGYZzeFj7>kEsJ%A(TUzp?_E*IWpH zFGglVA_OB=g7tp1WAToQwk^`L-~Hyf`C`?C+m=81@UrDk!d>ED>l@d9x8icR&yvOTbaXeR_W=@ z{OQd=9MOiw3R^PLXWn@0Wez+dju-g~T6n&K;T9W<#|C=`2giHI2WNO^1n(8^mF}@W zbb`lZoSB%QfE5~qIkX{YbK5r7#gw82T13mGlGvvgpbL{ zn7`;vGmD!W;2`gyFPTy69$U^{&HbWPFXeDF@~|D7hzd6!Y#UyM;bY)en;N7Pk)g8_u?;ae(Iz6w(xaR zZhHKomGjoIRl#T8WO+<`l8M)EdmuXhk{AE{&H=1%qA(0&_9ZcI(031fn~Jv4J*uEC;3OxA583PQ$BEC@IXfRK9J?VujV z`)~LxMdN$@92y<>QcuzIh+p{B++~(&h#KKNmCs3z^Jz2k1`$E>VdKk^nXf+PaDG+O z88fYA5#nRIXBSO6_*Os%fb;87=)-uOo@eCQkgUddosN&eoa2!$xp^iHnEldqZ!eqo z)(tc6C~xmfJh^Q7!&|Sq;(^uouRXbC6O(QkH%N5AH_`L@t1rC#!Rs&4n2iP(&Br|R zW6Z|u!MG6fLkDV6S}YT+v!(gUWtQ1i74Q+|4&yy>SU-u`5hh_)mx@LM|b|oc3U_DL}}o6o@%DxHcn#$R$T( znpxNw{>Sjk9>5(p6;Ei^{6HM+&Y)7#(Az@E~X#6Aa0JFu}NQD2>#aJh3G+rt>fi z(-0XslFuPqVEvg~8ie}JMMg?lf>KG5$Ed<*tb2+XCVZb6smC$9SQ4l|7Ex*XzFyz# z{SrA(eILEe9PHE)ZRe_)Ydb#>$JquzL4rDZtn2nWWKgs^!N^ILVmu*}< zcHbNv+Maj3v-6c!+pa@+7-Un_G|lNb~R!zv2PFW56fZdkCI5dDo?db zHfKy@elg?o%92UYHX+AamsgjADH#Ecw}UNFC-xh_B~v{Ui&t$BSXifN64vdQCOPG{ zge$fvX@?`dxFp3V4qv)_MEST5$!NUsIs9RG<42ykK-dxrKgae7N#O+3xFCLd7QkPH zvg<&e1-g3_;0me2&~uI=c#*qH`w)X{F`HCKNG_N{1-Ho=W$;qrNkTDC7L)Q&N%4AE zl3-9Fdy1d3f$ZG0$s4(0`MJ}g11ioN^5&b;J!==uuNpSVbDuVB#zkvS&B2T~Cp%91 z3NwP@r!UiISgne$mo?=(*E-Z^vgD!6oHgz1Rak57?;B+u=9_4qY@KWUQTvzQ(WkIi z@!5rE7oS_aq1WbK=Kd(d)X-~~by#v}>G{d?OIyt|lQT3#JEgLN6Yj5#(%*OO%|g1vy)zR1iER(^m#tqi<1ah#Xr(vLB=GW)~L|MS(|k6d&0)~Bw#V(a9{xKzcgDK*>Iu=NM(QA|NQGOzWgF-Wj7+Ei()o>3>swsgzpEoL~08cuTyfhG*sGWm-(SO zsA)kJP^V9VMa3CY%svF3q%3-6|Eewv+izh3^sqP~00sttsf2HRG(i9qe672w;}C*} zPPD)Yj30#uCQTKKkA?)_`kvGev=Cs!-=BbQB^qP7i7QyuziNBnSU_AD*c{j%=nBXI z(Fde}CbtXsa174_-2EVA5F`dhlo#CqHS{2#rG{7ucX%A_b{_lDkOz?{2)_d~7C$bG z^$*7|_GlCYgDGFPgx}rX^3bn4FAyKN@v8CbuIYRhbMht( z6h4B~XBOVjr(62d6Es>jSTTlQN%wH&jU1xwtr5c$A2o>$aV#s8;R$|LJ z8$L5Q8ybdf6OB`ZY#<15torlbg9|;kZRWCzlBz|E#NtI69*d4%xv8V0L;m}lZ=Uqa z>60ITt#>_$?3#7_nLWbW`cNQ3%0RhNS*<8RRZ+|`;<6R5!0c8LP&cv{TFvx?R+BmA zc5VPi2hxp9P$^B@U}bS@gSF8rLCg42uOUjX8m_xsQLu8I(hBJX+6RIVu~q3gkfaN= zU=4eHPbKMrdS*43Q=l6FLnEWjBB5an=>&Kx!vV-J)l^pA=u!ctAiTw)x}0g%g*>1Z zhdGKs6`}}A7>TIq8k+Ci&S}P!U)_cjj>{zCYq*LB&YqK$_b#m-GP*biw-P*Mgx<}XkaQI(YcGy z1nF{3^q3M}jvyav_Y@PNe5?a|54;y}AjuEtD{;gm14TB~ScYRLOueWDrT&Jd|k|m)Igv`peqHioI3Snf@=pH z0x(B5Kmf9)1Th6&Q2-{@h?zq%6#e+z=Oa$ZB{BCoB@0jxG1s{|A#l}1K#B0bB!|yA zbF3+84p~_t`XE~xMS-67+<|xsUmZZDl)Vf4kdZbR!>-m#e4DfV*heFL(6Ukz5iv+^ zrO-V<|AeG!mR@LcRP|CRHckx@E=4cs z&Y(a+``eN2obRiujeXyK#@?uJuwPGMxpBhXc$4y}A~#}lOp)TsLbzUcDX4frIbID0 zPgmlM-Ve!Ulfd>v%>*iVn%k3xpX2r%49I~9##kHj7;+VJ8LZnKqpOSSHMne!cL`(V zpYkpY)nbk}Lkc67Mepd3IQb0#dkyP1)=*O#GE*V@9YuQ?euY5njRvOFd-@owy(%Ri zmJeJ068tQoysex_h(R?`XbD9VFwzy~$4q__Da_1Nh{$j4V2W)>ZBB_P@G(bf12Q(q z^muOW)(AxmFddP#+eZYA_sDul?Aa|6QM)-CgII_)6rUMBr=-2*v>4W++d`=g$V<#G z7wx%RlwS_-ztP=vxhP*Hqg?UE(X6pJM}h8`Cg8n+{6+WRdI3SKEKIS30_NUkL5w3| zN+Z@QI>#7|Gv^=YxI%!>Ke|e8SdQyHE+q41gmRE#u);;DJ?x71pL1Xu7sPrV?hdxW zkemVa;Y}yEOahxB;TaM%pTwB1WL)iDsNo=x_SxJ~=CS*`7wVkqg7Ni}^bBh^HpX(z z<>)gP?guI!nYaAT__eRz|9E@hw6hofs$=r3bFZ(H(;LTLaM9#_+je&ri}zh}LEXlO zI`0&BUU5a!J-2nfPjiK2uFpX)0d|eDtI=vYWx7}^}#9@-z0L&z53 z5AeBp0*P3SnRj{$o+)q{Q}DFF1<@QMYY1Eh#z#Nl`MCf$Hpo*3@%cs@ZYbfnE?Co< ze+X`ac!5zq3iDNNa04c{MN>61!qHr5pc_%<)I7OCrl3%;Q`l{3lyfL9A6R3EZ+vjs z$1@&ia%ml9^G7UsR8HTyZRo-=71wkw5m#TjV9@Qack-z5A;1obF^YB}%;xV#1ryLK zOw&jBB!g$`5q$|A3iA&{BH;#%NP*mq@df+1e#GEf|x{ZmKZl zXh90>)1%1&)}vsxg!j<0Wf`xGw_K!LWRYoYqAz-R5rD(`QH~;oS7P#u~ zYd*~$-MjO~)jQi)-MCZqvf_2ivw!J)FaE+-mbG;qg`@CVtYc2CrWLFc@_SOymRsvCN?D0x?o*&RB%+ZS)FQ|7MvEHr_Q&v zx)ucINB1ZG?E5(MapceWhkb|h4<))1fkHXs%J{40Iv4!$TvJ@1S^u7wby?jGfIkGC zn*xB&1xL8(pvGL9uFZfwT~6>JOz`1^-1a$gZlbR>K9G1%d_0$#l*W=vb~J=&9MKD@ zv$dtnTPaq0QiAZG5%X@KoO9zGP|l&^9-#aL=f)gUH-K{P!3q`$QfA>efO2-G8=t|5 z0hIq7CWlr;^O2@wH%jj9W~y-Oq%7DdR)TbJB>!?<=*EZYZl8P8!FkI*y=uz(KJG`B zU-87FOPB1(wkp55W!$*6U3YECo?LToUFS*Zp*LQ7?d{iIeU}it5!qJh5XR6Y#8`iQ zomI?;WubxMXmN$j)ZlLjj}C9h-<+>hd8?ug`9r)zq5!<3GrcpTGxFQ=-!i@J`P}rS z?W>ThR4lM%5RX%B8z~O6O%Yqg_iZ1B{ucN${CV`0=tLeyUjzuN!{h@ZCpdzRN~F_t z9dt66Q+LjAwmIc|4n!cKCBXw6hyWAF;j2y#L^wGRfumSRX(*imTKR}oa5;p3y@5K~ zxzzo?@zo-lW`v_+%P|NupI~N=mz%?R`9DP;{sXq!d7RX+pA8Ih*CP)%$H6&X1i0ZR zsA{jWJ16`m`|V|azV^3^9_&m$dBu`Pwk=<_1qOU|;8@m&nKx%|cx2s=gQch5c;m(2 zz4gxTXoXvap?Mjj;uc=f2bO!8OJ;>~l{{D;FVB&e$|j3jwWt=m*KM&2sCHoGQ$o-z zB^!_%q#)sCUa`Raf7k4LWWDco_ZgZ!obmZObcRI1X*d^wWC;0VJ;S>#2b}F-F{y_j z(QCWARdiJ_Ei_G?W|^i|c;tn+O1>>cTjVkbFG7nSxe znxZvZXQgMAEG=A$z=dCGzqZ|8a%ZoNeIL>uvu!DUsH8pp+jIaXY(o+)=x!8tH;TF& z{5!-#_;;wmze5fF9ctu3+0Z@twNupMlueT(iL_s~_Q{Ko5vU;Ci#k6ZZU{GqF9>f7 zzZo_;!}0KC;ZMVIJiIWbcFk2uE#3H@K0p(a9YaBlDMPc0=s;Z#V%Q;@l?a8FAIcq4TDlS5p|v^J+zvgRY)7Wz(c5 zX`U8!eJ7SFcO#E2vUO;n!NN4*97~1$Re--iTEHnZALnQ{pN#a)HejkOo|&CRQiH9zT-|2I70duKk_}NG#nww z(vZn$?MI78pWy>Tsxx+3S+Uvx>Yl`GqK?eb@+od`b4!wVl5l z(^?PvBteZ1bt(H<}N0@0Se+erdN7=RT(cUQoR8i^9HQ2tGLlkLS-No z3X(bz;krtYYbikwF>T=PIcJj~*HSoa&-Ig_k9q;`b7*joE1YiLm|FD8kzau{iUpYAJ7m-~KyVFiy>6EMqiim8B z3U<|PK!C`@WVpNLteVK=2T%cy10)e~NKk6H`t9jk8eP^7tNYS%z$sf?pApDg~-+RSs`My%T{RUBEpZhi4{+QKKR?&_;kZ(lIX1QXmLFDk_T23IRJ9z_;plNC8TRF6wQ zu~BnOjau`d!KTb*YQ|9|RGaJ6fsP^4aFcExqmH%?b`E!s@=SG}=b4Yp_7{6rn65N0 zRri|qId^;hWjbjovARoy5__?u#98bq_YDwgJeRAh)w`rSZI808;#MmhWrW?PeU9G) z?|$EML_XsD!gJj8qa|kL@ufDtb@8o(Z=HPW$&qbTb2w#>;8x85#GNSzsc{aoWM{S% zto-ljHN>UtpuRF{Fgx>kQKr_N)-vw%<@2;@?n~TPyKix8ZcPR)(r_39Lt^cj;WFh& z++hUi!B-RijMX38s1Cy(h0&N5czmnSQ8X7KZufMJM)ZsaGRsJPj^=bEUUZw0YRB#I zWMBb=F~)&$PT3tkWZqB#?qoC-j^!%F*5qaivICnvvg&l(9Cm(t9`JF>4o6eRg9u%R zp!rU?>}&=SQMK76`yTcvA`cohcA0h^`I(CobPKB2T;{$GX{G6%)upf*94AJK4)*9S zcEWoCiz3{c!(&j)F9f*;|7nu7g#L3}O>Wjf*2moYFT!ffh+m)=xkJ-y^!E7p$sGu_ zNQloNg*L;>(RHvx=<7@%PT>&uKj$G);MEl^JLry*&w!^-Aujq;?(J zVNMuFJvd4!JNMFsc0&}zwZW|p?lkwMEAA8qh?J}wztnSael9H4lhCd%{1Y-49J5oGCXTY67fAeuE!hX!97y3#71YI-S=37 zT>04EP1R@Z-j?ln_Oa4;!C>w^?0!|ewDYdl-Vo=U{6M^V*QqzbX`EPx{sB(oVjtz! zA-|JZP00HMX9YVb(#a)qXF1Zw5KU5w;pj6?4|BqWoJ=5kQ@D1D^A7nA6=^1&`<4Bs z{pQ!47N;JljYwXL-yU&QvpVbbY@Jmt_e_$T&CS-yjyu_1+FjOX#67m(TVHj&?)pG_ z+ww>Ihpx{wPxs_P{Ef%$4A~)A;CB#zLoFwdzdp?XXZHD-eRJxogo&PTkb z9}zNe@JAF4rxa|OIcN$t-O@|}FObXo*VK@8NGe8h-<8?B|FWe|tXHc2-Rx~_&Bq_q zWxo_lSoVkEeb1>pnYDHPfsJa;w&1yv@G$uXG!kL|&e2Go=5tC`DHe8mOjeUu_rTvm zwwI#lZ&TSeBpy?QpL`mN&7j2N- z*AuXZJjK>xTd}>rt-rn6aktxA;wkZt2sC?|z0LksPph}pzrwWKzQTQ_?@IqF`z`LZ zp0(baeRpYFtVgRd)=-%U{ggkeNw=Md*gIEaLMZr=bAjKFS+ z=Abo@S+L#!{<3tiH?4G+Ul=`Kv^*R|p|*>4i|F#X5^7!nU1aBEon?Mv11y3n?VT zq+Mtiu$mv&Eqpc4I5fmeIXkczDyC;em{L*D?YPcuV?g;fBY4N5J7zfA9LF3A_w@%1 z2rt^Df_5!qC>Ya-x|dzBwDDvmLfGkAnk2SC-U>O}HEO>vsy~cS6B^u30s-n>pFu_Z zj5}A-V1s)gk}8*q48S|hcrb}w$Z~*i+dW-I@2S+6cz56#>P6~G{zX&<;}KT0!hN#S z$zy_y-H~SynWqM#XOVEWGf1Bx96V4f!U`=$0;d@Z!iG$RM%YmQ%A~(Ud}zt!Y@;-* z^M%V+%wvDQT~bZAU*0+WD$Bhz&b6pQdo{fN%7m-*F~gm##Dgdjkv8U#)lro~Vw0*s zaX=_7E3?H@2`?D28@xB3cslBFr<8DAG?kX>l&8yz>(Wn`m0f_Gk)qI=S!hHT!SfDQ zMF1oCPhNQFG}7#N**C+wM7_fndCwL-$m3kozY@AM9PE&L-6(oO3LG5*o*|+hckP%9 z5eshVH)23~;(++vAy&_%mP=>K>Yxka11{>NpC1+z7mg8UiH*0gvDN3#Im@NpH~JUZ zABWCg+Plwh-hDAOr2l=c;Klc|xwkaI+|7jN;1|=3rL_iZJRpkQH9>_0 z;HL*~R>2{>--ee--us`jI)WuT8zB*6JR9si`G?VL0Por2$ z8;HKO*S#o{O80QON71!E>rwJ)A4>R%laYkO zZi}Gj!@)&HqJJuzfLgGWgy!r)48}&tE?>DX}tjhC@9r#v#|Jb}5p{vWtif{Uhm==mK_-0`pI~(;KDt18$ zmBhe4F^M6WwF(|Zh6=Gua0Zw-2wIfp(}BqNY_mq7bDOKHd#-i7=UN{kj>0U2bwsIk zK`OBcb2GSTzF0~QN{gzi@%zG@>{QArj&qRj1mFIM{R$kaKnEt)H_^ATuT@tgx@o=L7RKwTySQbdn-Gi%@ay=RPl1MgU(}4vD}GjdZ?$Dl{)Bb z=U#dq`gJ6Rpeyi9bf%NI0wp*%3-7W=5l%mP2P2y#cc|sY$vnb@4n@17;$_ioQL!<~ z_?ru&8<3Di-jltOUp;(AZ_y7>h#P-Ko<)uKaYe3=e}_KqW%O}j_dRuYa2%PE(ZSDb zOR$7Qz1zZj*T~GluQDKam78&uFk-El;E&l*Rag{b-51Lkz00H0lmmFaAw~}g|3L3X zb%$6!uwkG$FosSQ*}%PGfzd6C`2GI;tii&MxwAIaep@Tn#xy#sw&$#t|9I9r0@At) z{q?lCsg5P;`qnM1Ypatt)NQWYUU#VOSe;U*_o=87>duPA28rzKo~x10Kx1Rv#fv@n zzK&n7SchLgy{>7d2@C7n>W|e+^|6a^JCfMYbA^_lJpJQ6S9q6SVSt`5j)V=(G+`va z%t$>PsTxV(&UGW#j}%9u_U}QAr^tq%aY4}d>3cfbbHxvZkMWH8MB&as@j>FCm_g%% zdQJtI<5`-rmr=A0&ZA@&pmO0w!6(%0VE8fkKKOiy-;0&UL|2S`8|$Kd-`I6ADFy-h zuH8hru^inKn9h5f#})9A9g!PZq;KIPdnkDBFD;Z9KI}xD`KN@eDKLOvAr` z;yeLnqi`?)!*Zk;WK*d7%x7H57|BAMfA1Mr8vLrjIftU-3W|=isn`zS$w&CyR?fTt zMzrf&+{xTC&VT0}K?vv?pfF@)8N`^@a{`S&@=u6$3O%)P-|Jo%gg6RVF*QMVuENTU z2xZaQb5f3ixgU>06g-ZP^1G141fjYeD{#8|yWM;vSjep48+(Nk$cqI%-c2=m$6y5`CM&YM?n`G3s`>d@gZVrH)o|v>sV2%6!g<3u zJ$m|dgKB<${yV}U{AvQ~MvPd%rWg}qals6FN%WiwGVi%x>Nn$l*YPvUbc7-2>wy53 zT*g@2kz-&&zI-Vsn$skYfj)f=>OhYocfd1{5R68?XI6HPd^F!0pcP|%n1$Vi#)@(J8q|Bn{jTHJ$boHVSBeZs9ZSF^0sgXPvwW!u36?ED@Q|_L zQY-S?$T9+2E|qj~1F|!mZCeO&g^Ofd(}R?2?8$$M>j(9`qzP$(01DiY$ssXPDk}CK z61}bOV7F&4#m_&tncb0{xA`QNEm3Hbwu(v#C0zL?mbwaAlQ zbDeWTbCXv%m&BIk-CX)u@^f~?c_jLM;)mq-h3*mIQF$X1bHj7<=9Za8Sw<&Xo%0hb zoR0_(yB{lktn71#HZ*^ZGCN<&Q{%8o!6NH)g_5XW6An8tNf?s=l15RzAYv~|#^FzC zgT1L3c^S|sGfVM&K}wpUp~7&)j{kea=?Fy#!$~iUB*h+kxHw`qIW&7D96<8%$MpfI zcM}Q=$;!f5z#KySoD|dG=V>Z3M|u?(d4LupFB=EdJ=}gC_cQ7cNZtC(Kx7pL+k(n1UL=2Oa=8%~>8)Hyo3;`5T z>ToLCh5&YGQxruB-4(itnQJ6|A6v@?W)FMEU(h&c{M;wc8nwJ`+U$z5O!STX^70W| z1`nOF+Wpc!NzXl3Z{PKIAa5JzUiM3=^jE!lO*$|;5H20?aB)ag8cb?M=Beu6t~;yY zR|_xhcW_zD?>eGa+_HZ5WtgGkvg2ew#$_TKhT4>-6x5X$OZDavgU@NGt1T~1d2E^u?qD%_HH#I)KiRAIj>n*`6~oi0K6-Y> zJiX%V`qD;d8j*7gru)x!vU*{_(9%XgX5rC`9yJDq$8Pb0xje@quXj&DTcqLfv^2EA ze-2`k-*B)7sy}J2#p#sc%)s*+p4rLVoq^IC!Zn~fyJ4E~LBR+W(~O|6tlt&8vUSdq zqO-GK{c72$D_V!w{r=%+|Kxwx-`YHFQPF@O@2eR;cx2harp8&%EqMNrSUGBBRrQlM z4<0wPPoD=?4~zVwb?&gr%;?9bUcC0)8NZGXSu&{Yxt))BubeTnzN*)dyJqyRZyUX+ zsZaS?YnvwDH~x8cI6b>Bs@u;yzki=;0~%^;FKDE7W&t}W&XryRUyl>gFkW|9Os`Xf zo+vD{O+qIT|3*E#pK|U68|Sue+}JvIqxgr`U;U~TZMqhNr{#EXXD8}N+8eb-Yonvl ziFC`%xsQD%FxILk%@#+YY8LFaLe09^&3-V-)jC~j#4U$i?N`#EAhihi7={Bd0xw7c zh~_AHY=Qb03CwNuH)VpMe{AEk@Bib7xMIWOpW^5J+NV#w@Yz#8Ofx<7!!)|5yOE=G z3+S;wyG37R?H%piFkqNA%stfGIAB_zc}4U3EF54Rsisq@61OW=T%2-wd_8-YCm0Gx zl8JbJo+VlUev(cV6{2Pxl5_bS{>=Yj@4dsLD)aZ@^E~IwoaucsHPezzdLac!2;?9| zIw(aEBPc~FN>Q)_Dk6f4Aoc=SS<9*fFrY3LL`6||b=_SXtY1`g#j?wyNM?BN&vPar zvVQmXzSs3$*Za>K$jmd7nNyzfc|P^ND=MpO;FvR+%D7zD!&qaimMSZXi_+0ZJ|JtY zmO3c%wNV2~@PpQ#lBGIode~o9i0z?Lv-u-$Lgk7^8&ZWRJf@@(Fb$K#P^1D${we-> z{-u5eyKCQVQUe%Ji{PX#+IH9WYDew}<=`F`DYuk3O@N){ z=tA(UnKGt5Iu3l8^}clD2_83;98mWTyjx`hA3^p)VI*B`7|z zy0B6pzB;)m{qDObzIW|Yy9>__=J}_ssMPjuSp0|Tduyll8Zjr~pY&*PN#y**g_CBl zkiUBFwRul1sGGB~C76gO`cLdRa^|YtH_RH+I^=@Ey-ULV7Bn_C7ms=7%v8V0%RcEy zqKFVqF~dR35vLTFdP*9*PV0G(XI8jwYql>J(3{ld7t#dyY;pXJT$*N#c8i`PK#Tfz2i> z2n){JEXjT7DLt-w2R~1$VtLyXFixWk_eH!+uwjx-it_W?l<+ACD4e&|7V59BDFEmI z42qT%O$P$q>f~TUxPY8>m@Je}goT&sLMW!$07$O+G#waLAE^byn)?B%wy1B~Q{+f< z?5KHZOOY`hF6vj%r-A{zIVr^%$jAa_v_dLc%|$I0%xiZgS-y-1zcrYYA_hF@d14}O z3=^aChvo-uaow=ti488$NsC!q=pmX+sI63Di9HaKG!Q7D*+S^XLNt>vkstX3o3-cl z?A^O|W$%9DEjH^9>$C4a{urxx=pk0|=DYqNme>HpiYp-VSdi`~F!K<&a*{@#xQ1*`xKa~CRV~?@&haP6t4;}kjuBe>X zGI5#76RMfNe(9a*X!bGo=D^;CjZye*yU(d6R;MM_VMg5>zBl$L=FTSj_L>hcvp&Ym z`g7ffJstB*iFW=(Tk?iN%$+CNYC!Y-Jf9+!BN%+1<{DiHV2!zjOA5a)R0`LbYB92! zRf4&6yj(6{C;fVU(B#`(^*)YwUR!d7(m)fVm{04P7|pqf(dR!V#)&yWx(I#&xdCMa z=7Y#|8duk=sEow{-FVW-*OfKIrCvpN_^siuFR6{Qp6T9k>9^*9KI;ED2QUf#Pcy(P zl%7sZ;Vun_Hs}B2*pIt1_Ufu{iJ6H>m&JS*_(h+Y}jmg1eU{jnnQYo zC*&G=iY&_^|KGN=DxeBrgKj>YIeY{{{>YIdxiT36CVf{@k0yKarn~@U+%&)D$75kg6k&Vn8M)(RyUv#O&C3+U}NOrUcJ&EwQHZVKj%WVs+ZYh#mj-!?E$|q z;A)`-IJnhrb}*aEJ8BtYOgfSP$CRL8K;vX~m%c8_VMz?}v_IQ2O~O3ZaKq5Qo5}le8wl8&s}} z%m!HX^bhKic!FT>_Bs~=bmo4jcbQz^#bgk_buuOee%bf0tavs1C7X6dHpPo~Eq|4T zv-e%jK4lxT1K5k3n12&7#cs;}V-t-7Lm=~3q46R6Uvsjc9eAe>f-wbrF3O~S6jV!S z9MJ@D4Dz_4qWCb0yiOR^nDsGW?t)7Xo7-briOC4C!zHHGfHgEpvX`*^K`CjoI)Qx% zB1RjSn#ekbM@j6y zN=w)e?9(ZrH4H)Z3}TZctUSt{&izn_qI=QdIt(G{rr0W7U(%=OO`$^6Mhx(EE7u)<1Z_1nK*wcGnRLB^v~>hgN50FHAM2*_sOjDDfU=)A|J~M z1z_y)*9?aYKS&WoWop<%+R{=uJ&K&N3xR@^+}qg{8I&GcJgDS!^V!MM)8ndUInSZ-4K)(m6Al%e^95<`lIS5A^9$rGdF6BCn@icc>+ zt8zA*$*-tc#jh_~Rs5j&FbgWO)$0Sbs{&rCO5}l8B4v%DWPQl#?@XBRC>NJtUUMm$ z#x!PSCFvq1KU!R)q^eJ}E8>8^sX?g;K0(CWn?rI1Jz32_FSKsqmDOpH_MA(e{!ap$ zUof1)tE|^HQ1UcY)!?mgqL$UViI3UEdT4=E+Amld{34e+0Tf(7+3U2M5Ftd+jg)zv zCXW*4$sg5{Azc@v2G>YVM$(h&a$vBVb5BrGuv8EyC!g+T+kA~t085r29c8NrO^*m~ zPbB9gkUdzh0BIvSr41AkwOE8UiOM!l7~m3R{stKEwy{eAl1lk})6{1s#7eehpWD82 z-h_+s>bz67utBdce^WW<=E)ytKhJa3$<3M2$o}J}e#D}N`WMFSUOHv*eSgnx*isxD z|H8P}ZhwE>=j_~Vr|uiplZZerIcvaq{F9ECT0%)y&+^{GJi3Q1GsF}h7(pn)_KtQ+ zVW=ovlY^A-uY$LcJ_kiE+|usi83`@S6Su@)jj}MAHk05Y9@L62owc2bk4%i-A^KK8Y@I7 z6t(5mv6LqdoPO!@yvjI8KNE3Q?LjMML94=F7H3IgL7Y{2YvNMelpohsMqOKgAwfz) zPvPHy9tejb+179?!U7A5c-^$d(}$coo}}O?NmBBZy|AI3(kQLU%uTX%5kD$lSGT69DMUwN2{7GbP zo`ny|{H)DfGwxu3;$&mAu=E6bBBAifa4W z{xjF1Be-ZG1+k9wfH?Cr=2vmSBjvX`vNGp%t^Aiw?4B*z*Rn6S{rK2PSGrOQF6FD0 z+?6Vv$yQ7ZY?>MrpeLVW(i6rf09*V#u33XCVa#SZq3ACRxN37kRxM3(Gf2r8LRfL( zzX;p(izdS9W?VI0b0!!*6bid?Pq=>9kudF--LIap2Kq1K>Xl$egUmXP%WvgwAbV~b z0_uIXWM_1|@ffx-irp(0Vh0RMUu(SzO@O>}mU}KYE%MxGy53{D#J4>LN#6g|demo;G z(b4E)^ZJL*pdp2)M3zSQGm)1f{JqG2q#rnu7;UG#EpZw&bYe)+pesOwP!vStXK_Bf z^Sw||(DHWVg@j*h6OE5t%G5vqila@MEF|MgC=Decj;Jv06>j=FRmBM~qoqPxw5G z$HYpLp<$V<;l*sff3F#L=BmT&HLTj+;KJPtZ%ITdmgZ=qFXfB;m&KO*6##}8Svi4_f+`N(b5U zMA|*GcA1mGSDzr?Ma2a^n+szs7bcB~$wMTg0wD{6DwN7&v_V!rPPu&G)YuT{rr#H)Z!{ zTRz6*Q{pAS_u{rW|{zm z{<>$Or3f32oH7voVjpP!l9Ww6klNmrrb!LjNYN)=tTsjpCS=tr<odZA0G^H3l)nAJl zRcNaD z_+oa#3+t8Q2Rbg5Z=_#3+wnqPqn;{NNRMc~I#VGK-Rw?tprS0#r`QTzrP$E!0&x4+ zoAXQ4?fgM4QCL`DsRGW5`S*e1P_g%kQc#;qi&CLPW!2lsdm$&wt4HB_4#Si_e7KXS zW26f&Go66|ou5PKE``M<;i%ig4Ux1A${21ej+WLkx4~O0m4wr^l84HBp*uoX5jBz5 zc)ie6X!;62r-;+Q^9gD=n66z)i>|R0^Ech*(#Yh>+Tjhw1*HMEJ76wynNIIHrv8+= z;*utxJI`DsZ{gm%r}ytaprpF7(3f9Oc7Aro@d25ivVXf^z`%yep71D^je-+_OV#`v z{tm{z0NURwxDaC2bYYn_?Ws&AdF2j%6}J8OwvfNLC66K3tR0trmDUhqjwOZpg$HYn z#(JlYCXy^U^VT}HwC<%kKBaD69oOo{0D{1OkiJw52j|S%d&xLG0xD$8VUkG*aUH_M zL0JaEU2q~{XCZ@C7f6&3h-^@ytD#?d;lEhN6uP32mfx@Xbhu@RtWLaTUU9>Fu^!@*Kv& zH3j{fQxh-k+wamJ`Yx|6VHJ)%h}Cpy)2K!mqg<-w2l$I(l?S8&ZPg*>c@Z6Bl#DDc z+0Y+h$>@`qzf_+2eF$;;r6_cEmYQb!@d<5&H1z`#oA|>Rz(#M%}(Et%$=_8wF z3)ei?zIsjjb8AkUI(_t*Y10kwzwyk|Z|r({%Ny4(xMuOfh1Xqi39Y3i;D>nCunz{` zM6IaN*%%%q4|5I+4=$R>Cb(y^8Sbmt72!*YR)&_Poqaf~BKP4Ct<)@+=k>|sM=>Z( z-X>3Zai3y-WAV{qUffjHBuSo_&tFW}&Y@mpSkSi0tX11${o?axIZxV6>%*VDQ2%oir8RA;m4xJAhi2xP#@}@F12W z9TAqp{MI6Pj0t_Qur3kE0~r;pgd&L3p2+uwgISmjZl5PW6848Vjc1&*o>lK+W42s3 z=ZuCaY)J<5{KWG+tp) zOSGC)ebETR0LLjs1BXmfCO9TI#}>_0&v#6BPAj<7bxHaLm#47(`0;H-=;4X>rAl^E za?p55tD<^7sjsUq@7p&pxM%d%str}Vs;agrdV3(6Xo?mWC)B}Fb8S=tqy_-gfWQfQ z1hchgk*BV2QB^Ok%0kDgqG5`V22IBZ;0@_KI2R*iJ%8VIPo?SBG&QT#>e#o_WdfCYF{)IP=s?FC1 z54}H|z1mxL>$PVrn$4mQtle-pRqRexk2v+*1>@`CN0hEP{w((4^Ce`MNag$$?RZf3 z8&UwTEeZ{m#)YPZO#Tpy0DPE_FG&Kk!)HJ_iaYA@rv}SI?ITG zXPVD;pOaphQm&U)+E=<(MwGAFH{oBTU%0X-`H}RI{Ew8fN7}=WD)M^wJ;}%ACz8rl zk;{{pr=BQKW+i79fU41@%yG{MUzlDfDabiW4dTO`N;*^-u1Gb8l}Dsq;lJ@Ooytht zfY8v`;9}*9)S`lQuG>?}H1mc23u70gR;3IcIaQ$bLKY5}!bz7C*~iYS9h~5^k_F+E zQWEEt@D|G@sD*wnU==6?8b0wiN;YsH717C-N(ba{g~S z%lS#87d|T8P{3z&(*cbf4!tU%o(W0&d-VrZQNxwwE%*5A=~-*AyXtSP+)(*Lr7Tq@ zDwkI7uT(1Ig+7T+>EHVA8bfx72fIwq?|LtG3!q@>fd-ubeP=+V$BFZk|8p zhEs1IIPk)itdiNwo*n%5o;#+qZ!+6k-hO*TV;RKb1KF|4XxOH*l#*6xy>5!LtKv7s z`0V&qF+RmLJH}gMt#Lj)cDjp?@C?uAO`d@+9?!Sh6vbEU_bcwwpfg2QzLrEOY_n|i zo#UUACRiPINFkhrKTD+AJ;7(;S}#J`3)`O7|c& z8Jp!Ml~o@ggERAGHA?lKjf%e);P0wzw7w-ZDZLHm-f|Oi%M8Y5q8oq9V!bB$w|_(3 zKGKSN%N#&y@&PD`)vnB!J!*!JJ+nfqH z^K6i8B28%l+oUupMnrh3w^8IrE&_yTKGa+ec2+5@$|2(!px~jbk|V$pMEM43qQbQX z`vM-GrpG%zhZ}7Z+lGh+lg1t2tV~l!!cw|H>!TQbV8H-P8i&OsD{2d1Xi<})A`KH- z{|JK-bRl2mnsPN<~<(A1!EB+|6A85)8+Mx%*^&4!T4d|7fn85yJW?3N%(bq+9O z7Bz|lJ>mgIqFzv#dU^vXd%67y)t4&BW|i=;>>F9v?i8l!?dr&%r^}g1+MOM5(p6U> z>#8r9zLU}x?VR(CjG4`n3dvy%Me~CaOGP6gIMa+KkGCLAQoyW}bi3u@~0Or@36mE>miiGE@LnGQT#p&|2Uq&nrx&ipndit5u0R9bnE% zIg8vp>IggCNcxGTqDA2}lK4Xnyr-cd)H6DW4>k-64USIW6GGGZ^w1*1qUdtN@@PC_ ziFhKVzDmVvv3jhfN~KbmC}`&!o-0nwODs)r=;m#kQCUQ9q1wAi4kp0bv*Ee&#Jup* zFbA?yTbqxe3W;yng32Rf@$;-7DuEKLUt3{8fs0#44Qyog1uDkL?Y15qVP}(%^x8CHR@4YP55Oa%qr^xJH*+KNWTm2J8Ck)TQ9>Z+3 z4AKG(Dd-3+jxazmK`Bxr0wUg|SyuKO8^bSQW3uzJLkzQj`AUAh zqY>A$TpA+plmCkAvP#R~xB-e+NX|C{^`f19{aoZ#P6JUoGb{*$(+^>pbBWTP0V@^aCH zl$5IB7!iggD?nAYS3IK9N?x0NnYFB2%uIJ>UuTVXaaXoCdxO1^?X5r4+`kF;@I~$( zmTIU#=H@)v1EfWj?jdNfTDoq6uGmO)UAX$(h0?QDkh_VaxQT~t#SE1$nWuNBd&*66sk;+p=#-okO~=(y#bWoDfRjh2X-HO*38*;2_TSF%bV zMeCI(s9vL)aLw_r)Sd8h1f)WCmo~;1^2H*FNTH&Hl$bSPEmYi!{hVD;IZ{{XE0`y-JY1s$)!MyaZ@~IFsCY=Jmv`qYm=a%gquKMK#8z^K;Wej zMwIZH=RP4&qYmp~{eZx-rB7&8q$3Av0%HO^0oDi}C`$XsxrfytV8q8rQzV`M=>(+B zk~B+4)0o&g0X-Tx7QrT&fnF>43JyZY7f?|@X(FUTe*l~tA}ygJ^4Q)joR`AnBz2l1 z%MhY^;y`O_x72}7mK!~D02^d3WuTMs21A`w=eo3H@`xzO|Cs$8>_$UbEdxrGES-M*pgI7)h=f!nHL&}%3)B8H ztbAbJz=GlF;bj9Whu03QKf`c_X>7sd^yIR!mFJr-a7-zjlb%yHwbFZb?YMe=dfucE zug$9~=#y?LtE=o+TU(DL-x9#L7TFu>s-w61Hu$*DCznSpby38&)FLp~MWu)f!4q;m zq+>DJTCJ66zOWXl^8u+?SU&Iw_yjWPJ|Am=eFJyH<{AxdC!NR+4L#I@#)qA*srQ6g zCf=dZL_^O6LE~Q{(1OOd_^^QSeM@*K@Xn9V(Fnf>Hx|h~taWcYU!rNrLL>O;Kt?AB zbaM1`C>O&!n6dyF3DJR!CFC42igNyaM%H2+4IGqE&y)NVG*dEw>+AXfC=|WIZk!Qf zs+*@m_fvFgQ{7@9f~c;G%!)K6N!ICh^csLPI1zMPNH{PIx}QS+(}>+y{5^YU;rDA- z?yb|_yy_F?;vGN8ZOf;wPHZXvN89obI*y%s_HEa+Z(VlA+L3Z4A8uQ}=(5M2o;nR! zy3+5Mb=k#dwqAss$@d?9dG6fsBW*L;t{vwt{^0f9d(V5xedm1-BNE!d_9%1tB+RE6 zGJzV1Ei!@qIV31Hb#)Q7({R-hW2B;7%=WBVL%OTP_LwB`y|8Ad?}@MFh&fx4phd~& zZhXgtPvR?F8Y};cpOy30{#84BJ%A|r_v-gvzE9ofX46ehn+K9vt4SoLyaG! zReR^EOVrgsN@m{PzGjEr)(kKsEfB7EEM#hLQ!`YQW?5bhy(JXvtSb;I2h1BpUnxfj zEvbdlhTw+M_aoOK3eDj^@J9*1Pg;OtWl(bJKX6OIzC-Z_4J&9?PqdluPue!jQWh1Kd3(Fb({vvnYZ{yDDJLqHB;ONW>KmIK zUFJY%G1C*!C&p+s3?0b9NXi_}W*hpe*dFMILVD^h`$8B`%@Vq-7}_mkvqW!+*q6Xz zpsvzk8ITDtzgPXe%Fj~gsK}wf zWWjwR`>2_jde3D`*lO@sQ3v={9826H=0t+#gpJDJI$;`sl0Y}t$__|0EpU?vv||cD zxf3H#x6^&J`~TJ$;|z1;uUTI)vX^M-_tpPV`LpV)D*va6-DcWo;uo8)HSzJL>42?+ z)rRrP4NpuAkGO0wuja-=492f6lb|thnfe7{6sY&5&PhtM)Z49-ullXNrF~@-Ye8Pj z`Dl^mHfO7;nMwxR`R_EdhUnu_gjbC8s!nQthOruuKY{{sKjUNAJO(VWi+Hnikx{wG z+H5wOFVYNS47VCK7!vFh!`*>pyJOarkIS13O;ukhvYJHK~eQ$Rc2a5Q1we; zFe8)-D_A((ZZPG7yQZA!k2iE5Kv$zjjhRE`510vN?ZNChbF-JT8yd4uLis=6a7(6F zrVoFOzjh}e(Y5`>VaS@HxfQYvwloAhQ(0lZsp8v)vKm`-Lzb)n?@~A-i%#z#=LIewBCN>BbQr_9lo`m5*$(INglC5?~AS1$g(4&S3n6r(_ z$or4AH5zS4597GU51^uFERI_({h@6JN5%}`&_K9?n&JIzW_LXh1I063t8Z(0j%a;l zc~zB^4kXYFg!1xYJ{1Asv0F^5i&zmFo8!(Hp0p}Q9PZy(x$KLD%yyUAs5tTyg~f_B z9Z$u8U1x>4VXdsH3iK#K1T|HC=!0h0ht0^NXsoWoc3p=RLPcU2S}bpJg^UVRGwCI=A_e? z`@JKb%fFa1=)8BD*kvQ;EHCTf^Dk$EZ#na{9n&F8&X$Jo3Sl9RAinAEKsf{Z^gGRh zWJqirkHOtVreYpvNBKg@@Ml^DAU}*w06y)b3Y~*a&?2vgVL3~kzga)_mpFLm zvFWfl?)2!c`GS6fM)2S)6I4IepCP0pt z%TW(fvS2zI!+s00Ng|Q3mLCj-qETR9{{T|}ZufkFosZXqkO?(3-$v>lUVo#NTA}Q% z&)=DUJYPnw)2)_gfL&PP@pzwi`+R;aSB$+DTOH*c5q>5bq;ZPIKZSaVcPO*9>rcq0 zC0aoX9a;{>uF{izbWIVOgr2G;_6sx*v?60d*CT~(J5t|BAg>`>gmOU|2TekuUx?VK z14&J)Lv=+Ypiv4Q!E3I`5eVv&iyY~G0t_aJDPvkcw_(GI+oq0-HdM3@eCHiD0o+~k zy{pcDU{l~O^RUYXt?HQGjmira9s2!Zz2D(}Eu&tSEv8^FAbEX0OxAKa*XJ|{ScVdH zy%q|Ac!aDk^imMn@p$a&@}TH)y#7Y35M3VJ35pas7>;!7aWf`S}q0OH<8&NVD%F8A|wA_s_R zl$&-mr-UY;Pc9wXsahZj;2r%hI0k?G|=h_Cos2 zKFSts^g`m6%bu|bT z4regQ*9e`B(TTciLQsS#T88^1#l1@pB3(@=?A?{kERnWyt|`hd3Z?x;r#BCdOJn+tjI$vF z(HeFNTD_9xajBsIMRS#KFL4|3~C`C=!-b%6FB zmG6{O|6l%O_%Gkm_{8fno3iKLf6dYjH?3W|VcqEdeG=srspfvkN`A`yOP6ju@or_R z554Q4&#t=d*PrcaG#~v*x#sn)+g{oA`i|G`npN3+?_Dj`RV^J0#CyB9zjo*B%D(s9 z-LJZ;-@5;N>o+{UAA+c27BGRFe6D-Fo1lT=<)z(Y~ZvJ)wG_ zwUNKs+5P-Fl8rdO(8x<5FrnqMInc;Y-OL%BufTPs{nDWYqxIsYFF#T5Ab0%+$M+7t z0ZGLuEo*0vtpf1K+x@~Y5p>kDlkac17mH)om*#{i|6lZUwvjdIlS7)yTS*ZZ!bN(lMl@9-u!+#P!eQ`|Pr++ha;#pE* z4*$!Q7pk8Z-xud`SqwgahH@9A<2arC?GM>*d>g1&T#6+T0N@NpUgs^~o=^V#0QRbW zHt4b-Pq5nR$K-zx)G7UJK%L5c7ieGj_QvC18B*d}5sINPWGnFPaxKDwn$1x^I>_R| z>L6bdTpi>=Kqx`y51u##!1u&zIr%6bilY>7W(+V@-8&J<(}A#+^)GLTY(xEs+c%`R zd?KMJoqU7=?0Q@W5X)E|VTCQpW9yOgs9!){=P|`X!(~gm@o-Zzy(Gwlf(A9M`Iy6BjBjuCBSb9j&k^Xf7O2*!o1bU5F z7l?%1ySGpxJ#Dq9m|cr1Qlm6f^BIh=eO&-*x!{CEEP(%;`UjLTKWBI{at zl|O8F96I@(n)?O^(}0bS!dPoSYI3%i0Bf_^+k!-mEn3J1Y)eq+F0x+Z8tJ}2ut-`G z4DC-+)d)h6M-Cj;2^JCL|MJ_8%mLxE5PEHeFAv48vcHI@eR5u{$1>?TZdbPhHd+9A;rWn11fm2C-6b+0;#@9SGiKxIvh6Z_5XQ;FJmt@0%BvUj>(>+XbPE>rE(`|YwqdAGdyC&}m<%nkfU1z;zW{iT?srCiQ}5|6 z1^Ap{aeLuF&gb)#x7Mx0cdL}G{Axox;_AcO#zO^!UZB}r$`bGmg9Jj7OwU7eP$4eX zYfg>5sC=7HLk`ee-P=D{4(sc1I5aYoA*gN|5LYvj3d!VU;#Shdb)vF@^DCWqFIjrG z^Ge6q!KaNiv|qja`o#kW^rvh80;SBm3@dT%KrKV96TFCU#=WKJflHXVq*d!W?ic%% z$M^5Y^n6fR!YzhF!k0Tia~PL-M#>{?%cMC$;aobB2^~I+38D+b&CX>aatmUFdb8QT zOlB>I>Z;2#kuBzB&XMrCZP_Y~6mR2e!6W!RG!f@JdEd?@LOwWq=&)I{h7O-Kq6H8| z{VK{^3{694%^opw*6h*MnpRnsH5oqwyzzClTbC z#nxnf#JI>alJWcCE$!EqcZP_0XDA^79`a_m?+zWNTMzWA)3I|fH+qpxW-#Td0J6`JSi`J=KKOb_}bH{Eq#BIRm+`*PI)H zCMX%34GWwKQ$B%R!4^m%)4c7VGZh|M${#(9;yW_zkSwIHGpatX!*A2&-@O0UlCR#? ztI+jIbS6EQoxm@Yf=HtUdBNYX|0e8Skp-Y<17HFKK!SUsXifO zdezAVaveMWqPPG4(qG>(1Ru{HWXLFjPf=3J4Lh)c+BCn!Op0Ar?64)din2Y1vJdrn z=75=l9@)&Sz|?YRg*G_U;UP6y6rKR-=s2fp`qThVpk+E}AMJ<>^6_@P94gl!hXCIB zWs3za7&MNEa2*-hASVtihr7UgM}W;ewF#tQ@B@{6l^E% z)hfNJ4T%f#ikIx~*^k?ebmLZ8vHt4D0ZINQvsi&DDDG7PbCkGCP1y&%VU%6)CR-_{ zO?QmOFFJc}j>c6ad|HtR4T@G1d7bbHoy!=FW15>{jS|B3?}6)llughUb>2F}>zA#a zx8MEV$#&K69I+Dp-h25~cfs}7UfXuV4Q;&NMGsuWKg;a6`2LGCrPy!JhL80hY8oD2 z(tuQHA1h(gB;ee#A9}DJc^=y6>rd>nVwV>@cmkPmg8m(89VQ8VXCA)e7vE{)a~JPHMd{FrF`Bo+}GKLdLNt4L~1 z6Z!#_iPFyjEEM<`yOJAV%vO_NhHo*`j5Z#H3eZ!HTLHjkMg&zc=0{^uZgc{}1l*^_ zP&gP4gk@t~jU-u~2MY2Yp8x=Kcl&5_vVyBI!L{&DdrRhJzD> zGI*+Vc&Nf;iP>!jczi*=R0L*Sso0XA&r+^LJ}Y#j^NB4izf&O;bvRVlS(g_8aste4 zi4;LSwL1VXLjzL1nW)6j3#eHA--a}NSotK=vfupW`07^1Uuin?l)|3%*+UiIpNmqI zW!FCb^mR+y3_E((%$PAYdu#TxO!nhfmVC+#EE+5rdRm_)=U;eM$Ad4u_0}I>+4U~u ziHK|o`$ms!-ONV9t_Tt$tjicCMA%;?d*K^;eC-uTMm@fEGg%k#8Mqnfg>Q}RkMWo@ zMdC5mCF%fNOLFG{$hr(>p;HP=8?`)F*ynV`93M%ca7>XuLe8Vfmj(rw&fF{-G>T|^ zL#*${@h(dOfrhFmh)>x6opr7sh*rMNrDw;c zk#QZXEnLTq>c)DyF;f{0q|lp-2q6&$eicdQ$+K^_o7jXojP4I0M)2TYr1IK^eJ z4-{2oy6J@FBDN#JjDtKB1_9*B&v2&$KfHo`nc3q{8haT zR8Zz+tQR8o$l7OYBj8qdU%{GK<7L@5Sko2R-3W!=&H9KYYrH)By7+VUHK;SvZ2l?x z#PBw5a}4}!sH>6jt0L`5kAYD-nk`%)06*zZPx>kz- ziz>($2EPb=5$uowxXi+#psd=1Ay0S*KNlR_@ojE?9VjT_saCpBSbPXe>NUZ7 zr38@Sls3UWY5sulIqk!bTU-# zj2ed%nR!rkdgbMe>#qCDfyJT>F}c>4DDQLXgdu0x{0>vzX~UcQ^lk~PF1`A)TdzBH zz{GJ6ti5TSywTa57aP^97U^dfvF7rk`iiE&tZ}o)^qSz!n?A60_)vHysfSleN`Y}IM;ww2g2#$-)G--M zYBU&855jr$&}ORw=Ds-j4x5lw5|30zS|V~}WN;@s5P-*037#ENDSFLXR))DC$!14! zWCx#$iIcB+{iUfUKx@yeFeo;ADe%nSf-ibtTlpFA*tm`mj2)@ecs`6+3i$*m^a}fh zx|G6Qb>FeJQTiCj zJF?fX+poCbf?L1*jXuBUO7>AUFy|g-WNP*(dOC@julIB%u&Dd6x~@`mx50Oy$S50C9x0MCBz> zHaR*s%Ed!;P4GpU{E``u<>+Ql9GLyAp}xKm*ap>o8mH9&mh;)oRduyA2Na>Xd8pZ;Q2#$4(k-$Zz~8 zc4(MxX{Y8>W5{hV$EG zU1UGlLsrvBDZizA)V|yVXmR^Phm@=L4GJO-9n=jcANMUDT-+Ppc~ch^`4Ca z&LF1~(jsZbkkF5iZ6wtQF{1f!ImBF+3f3`#J!*OZw*3TVpdXpPn>KBdr)OGuZ}_D8 zWM6yb$n8q?Q`x4cvOnH_gnTF$B1W@a4ZtEP6F!l@aR-|$RZxtotkZdt6N*x0u>*b( z5J$}`+fc^K9AL^JY9k!w6!0~EgBhQe2;x(f{w5tjrxLT#OVzB}TX6vbk=0!$jqbg9 z7N5M;NL*8TJnf_7Z^L?`xDL2pPKxWm9f|0ei0i1_+!@z7JButL!91n=nLc4-fm)W( z@>sl%y;S!@9S7=2U1f+2s@g949lG8Yu=zYI*nD(v(Uy@fRS(3uDj3qnomEuhzYBmNe>W74vYOsDBX%G6chDM~zD(t+x7;rJ^ z^6XM3&E zLUUXieEgs?59=lbpVT64(b(vi!qvrc3zQpsI#?EKwD%15j5ZYwvkwXkjtnh2%YK&c z4F3f1U*w1K3ZPHK%Il>MWRsa1<`2}##<@lwH&z>4jFXM)jRxZy1D^~CSOhbxZmq#+E*5LK zSgc%&hkRVHi$(oAT1IlOSOkg$-i66X)|y3Fe|Y_!unf`oSv!SXBrc0)S!$6jmxK#F z0+ZAeq{ZxskyP*qOV1l%Q`3rO}h zLm)%=k|vHEJW9$VjBUu>`OO(B1}G=ZoNPiE3k$&I0J#a^S%R}~fTc(1_8zkQ0<04? z21>A8fmErp+SaUjZ2GPxe_lB2gX^Z;QJs1J$%Ts^*>vS)8jALL>9c9LS{xoA}gYTM{%5fG#O2sv4_-(XY6y|5qm z@~gYm)RAl|6f5Cx>~zQ`Fz#^XwV5v{##7hoAz7yLjgb@@sDEDicTNhu+svAyGD)H4 zE-56Kk&b7wQ;aslj;?cjCa;GK$sbzpmBT`E=;!zhaoCXL zq0jd&7N74e1PFZ=J{!YX!aof)mi*Jwk63?|nxDrY3P5Ira(Lb0{MtwEjl3) zFjL_Bk)>0y&c4Gsvj`t#y;jq+cx-%Z@pT2LuMihL)UVpJg{y z+6#i*2txaGA{5O-z;1wsl1nCQC_0cLVJST%PmNkFi6IWFh017TuCX4Vs41wkhQhY6 zwJ0i<3AoJ;YXNd3N5wLUie(bDl4}HiBb++X_)f2;i<;?9@Q1Ioe!}Mz)uL0POQTBk zlKkIVF2aHA0dxzHbyfNTEf#Rqlp;8ggZS!lBzIdaSR>f&2;wqJvu30VYCbI#u~AxKBwqm&Bm%-fAwQBx~MTGCf4&1u|_{bCa5G8gW2rX*t@ZPF?mXCX^fA-&WhT;9boc9O|HJcQF_@e zn~Qc?{cQW7_fGro*<2^WQhGL*bTTX7K50L$LP)w8i2NXhAQf^9k0j~tfW}%TbPY;o zKxU0}I&=-eMP}yn=nl=ll8mR|Q*CNKu5q;f`Qy{rB&nHnij$PBt$?E-T?;I9YPUoy zYd11JaM4jPjO$k+TO$I7Cwp>1)F?dC7Kud_Z ztR`pJV|OHM*1}vAlVB`GI5SJK1TCdAc$xD*!!;Ljlm8hThAtHdkYf}<#8IP#Kv+RP zfh`iULBhj{oLhZg>0JI;@JT;^WAB(VY8!j;%muITKPiXy?w!9XJu-Ceq+?#>N&o99 z+}#Se2*-)Lb4Y*IKm*MzNC|jO2vcOQXk5~$3+Y%`J_tAOYJI$L;9!P9N2rBs3|G%dMlF zr^2i|$)k);L65lR6O>+g25GtI54;EAn9Ii{1G>9VBr$kjr|4U=vI=#7pJd@m&Spq^rf(@GX6f-G`ktF6Mg=eg3kuG2ba$L`{n69aPk@Vi8Bf*U5 zc%QQMI7dqJBGQ~lB=`>rmXIevFv+}&i53<>H|9i&H=Qo-?o;XSxV!XbkS~%wBFfWO zL$)d9;`=9r4AFJs+#AvFPl)oPn$K%`J`t+e^;Szt^zt7M8Sm*VA%$zGl&-%OM8-~2=%p_IyGw)+5NL6Hz z@|FVjeZlbpKDl6Z0WZMGXP`Oh(3$G~~-IO)9lOw)50h>=*g zDQ{*D+%YjkE~#H#&qvo!uIKf6!gTAF!k~Zm0JAtLeL=>sh_O z^XoZ@E_M<~oSMvw$_}gzRq{xC+Uy>aV+ZSjxeKmavvs<~W10?N9t*Ry^X>3ccuqr7 z!E{TdYTyU~aupH4c7E|zqk+6{?F@(usN$%GC_;M+Qxg=!LOqfQ@ta84; zIY9xe&iH;-Pm+Oq1V?T2ZUbsTmyCfN0pSmPLUaE3=V3dmpQ9j`GHPeE@KX1?WRhP#mw zmSl?NPbRAidL(6uf$=1k48o2u=f&F5H?~kj3kOjQ1XvrO;cU~Y<>Z=RBpeKB;V@HO z=5nXSm=~2%CR8xgm6B5HNoshCZ0tC4Ys`a?Q2b3ND%%fY7ZUG~U`FHc76L;=LwDAo zLDE|QwtcGwNj$5d+rXn;WJ9P4^;VR6@8C5k9q9S&E_7OMw<5C-a`+H=)>;8tB?@^( z$BBI_w{219k&q@8RtG(j+;w%Lz_+z1pc{Cw^y=J5K!VLDP?$x(l5X3JyTT)y&zn=R zY+2jZt={sI*xTEOj;fffZd~u`H~m3A^(Lmy$=-ZZ=C+X);cP>D``{~6gUT+!TB(6P zu|rrYy~H}-$DCMgf~6LWw%wgETM^D=MiQ+ylU7t*Zz4f&!g6|wq^eL1Fm?sU|H&y@ zZIM#a4_et=!yMGW-=Zp0*c9Uwbsn3iEKrxSrHV-k1pQDPtoDe_Vf7`!Q&XuGGVHS0 zT(P^Vbqw~2#xx!-jQGmKeq$1bf7q6SH??M4Fuy3^8-#>+9f|{uBU=b>0Sz568x6vG z5X2n^g|Q+&LnDR8VV(6%0&aS!fIzZ!?`Wr4j`Coj>K_nxIL&VY8YX5x5mcZtq6m#X zMrhD5p~RWOuids)`F|NQJ}9@-y~S94%vJ)%sShPc z2A4K9*FT-BbZbZ6(A6yq%7YCJId|4`=uv+JJmV1Vk>^n26*Ec~otyI4TLi~BW{&lx5Sv0wx8uoI zMdavow3GRDKb5vXdiKzd*2Qq!>G_H#1Y_=YDId-_?9ut|9F2WF9ZSy)BPkATUhL|} zaAj0~@Gfa&PeUVvZdLEX?lDnp11B_cToCOZ(B*%XD|WN=J6ug{D;tgJqPAyeim!RP zrH$$PmN>KUYU*eHh}GcCk*(}qQSNZuExy;S+(F#Cezujk-d1tFABnS7Vb^iOAw}6Z z)lUAs^SF~ck?g%u90P?NT`6mNvO1F<#yakeb!U*S)1tr>hkj}>xfnJA1~5A%VK7KS z;TEDlw+DRFDf$V3QtS#Dv1afY9r6SX`V90u4G~4aJiuP*gb_wVPtZr>s7M7tLx-P5 z;}#dX-Sa8NIFRjcE(UXk;I5?k)Y9Xl7oQLsJctwF3zXq?g5vh ztS)FRa+|zK=Cs(8l3zh9z|r!kuB7C!;lXFfOR^xt19hi(C`UdA#kpF&1m`0Ws86bp z_woV0l3o;~B+5*m0ttZJb#_B`QFiaA*|yu>V6FrESzh+w$A8K0f1eNJdl2NkFWZ`Z zJo{vJ%BO77rei?<>VtI2AZyHilwJBH6ct$-fBY?@-S8!}kC2pLR1-iN8*B}( z248)!Inop#C=XPJx~HV(rIz_u#@44cq;{r$Ncp5xZ<&|+(}8rfB-Yz_filxSJup2w zGqy;%+<3Wtk!yvr!gqb(`sj+7w^3#ZugMw?0r8$SX=#f$=9$NrBG>pqNw%27X0L_a zYG#^wjCqQAo_VR+U`|Nzz;@_{;1Vc67Ds*>I{9I($qC_ZD5J?~2vgaTWSC;$2|Ua* ztT${h>@@sfPz_;h`mU6gl3l49JUx?oDP>5J{%do2>hsbL;p;-J(>J>5=`hhz@tRP9 zMB|`lr-LVuvJgsk4LJts5EdQ=p312SWOIHwQqbYW6~ihvgy-)W~(*WMaq@z@WDs-0q3Dm}%R@hL;ER8X^{^b}dEQPRBZ zHrl`KBA?1$L@UxR^4VJKqwU;YpX|~}2|NJ06WCei(>e@;4!InAvhGC(w{@Vgl?DMW za~cJs>;Xc2u`!wAZi2-ENOMs)q;>h;_Q8Dy_h~E78=L+4jl=)mbM*rQ)olAUcigh% zsz>it(w`uCZSk6E=X}H}piG)ra{BoNzV~+R{OB)xcD<1sfeOfBFH#zoYuEJgurn=Z z*~U9Buv}ofEN&bW8WtHAA8$O%Y$*1Z#C!RB#|QZhy8&#M*BES!*2N}8Z}Hv`zgynq zY4?2S|0wiH^uzd9wy*Mjvi+3jnQfeFnQEKjnwmE^I5#>aw%BW)Y@BQvV;kcdlQ%hd zR&-3vWR(4KxqG&4mdoJfQa+89!L7PEpd;_u4Cnwn0XRjvQeK6^Z6?*YH8-E+p6y=Z zUhP&4Zj(cUc#|BkMYW`eo4|JuzyTzZenYsyXbD%aNhH8^&oljTixmTlcV5zl6&BUk zn3!a8nGz=1)FbIbH3N+-Sj{oAfFs6s<3L&~wP}=5!cXfBsBP31`<|l()rLJ1U878q z&MrZV7j@O{y6RTwUyZE`@QA&K&@<{<>PUfxItB+w>jJ{4oKKq(a%jN*0kO%ECJG>6 zZ#T#9@8x9Xy=i~`!yVbL_y7B6X57|#^z^n>S3JM@hT9)8>{vVFwq>8cjMT*cVcI7Q zMu^&fU&o%uKY8J;7v4FDEn7MdQM;*FbA{4DtyerfIej(1+J7Tk#ji6hbFT8=n73Kk z?0i*u#pzfI0J&sixMymidbw+hVOw&SVRsTdb%8wUGzTgy^74FA3+5GAjA)$9^K3g& z3u+5dmd)k@6=vQ&T6xGCGXmp05M@$cz^Em(aH>9$h`MOpq(A}?c1r_F;PJFuBTv0< zszgQcSyVuc#iOI6JX&9*cP;g5ZS{O~CWwBi_zcnvzg@c+-b7v={TYxYBKCw;q^1e? zf%>qQ|3p)$yucnI_JX=v`TwHrJ>a9N(!cS0Zkyg`GBe3!GU?(}LR_*%9aFnX~%cwRpv@A79zOXWI*Pf0AC9 zTs6A9M-}wO+q&O)%c>dA{(jlZeJi)#0mpj6xR>7mMIG>T$ca8VW@OvGiXqHou(uai{sxxFW1aqJ>eOE^dI$oe~ z5ES$bIw+jtD+3fh;_@Dazw<>f3TL!TV#9&*Sb3IQEz3?gb7*q%aErVLUXAcRCT+?)pn*Odai69{fIfhNBZKe;CU=VPBB z04MvsK4W&TdU5y0d!Bnl&fB^qekJ}@{3oaa9+t+Ofu(a}nfEhC9hP-ud2qbMa-mr3*&9$Gl?wLXWY#1U z&hU8vg&#CzwZd=Q@f^pOcn_d(QENr?aEcL;bc18!De=5$(2)3S4`5uIeW#ibB-C4s zj;WIWsq&6JV_a6o0Rr||7ri*ws+j`_}>Xw zLM3X*lVb6)kdPI!7!V8sE%}h@`3XdV?7`4w_DhF;GATOh`l&^nGX){-I})0!AJsKaV(y9kQcMzLXQ>An^zosMj*+ z;vkQM;`r2zM?_Tkzh!7(UsR2W&K3NQz0hK3M_J?uuvd@FFB^p zW(E}o2Y({_Hl-+05d`GQ0-&J4^V4$~z<$bp|2L0^r;XZNyJXxKwJ#lvzY1)#-OFEI z-Mg%f-09)&@y{RnDO=n=b#AZQuWVWH_jmH(!Ly9@V8S_;Ti?(B#hhQGun z!9?^u^irUy?iE>u1fO3DwG2R|s{nGADoJ9hNyeH+U`LEyv6t-pnJZgZsq;^LLz=LR zX*EK-(oEl(zIwC#Onl19mGStB65vvnTDcw@CFvqO-fwpv7=ft?S%i2am8 zlJSE?pt#8|K(891rIyLvD3eSC3zC4O&?+NvoXG_O9!@-83An z*wMY9WyORU@u}>dVckbfj^76=l{9=~+=izp7p@n^u&LU#N_kLdV%mhz8vDkZHw<4t z^1%@gk2I?xbG8yq2?eu#(d@LM?9$Zk!5dP0cj{enlQcvgFHQH1k4`8aH*mhZ*t4*5 z@y&}yu8}v}H+$|%S(CA$Q}w_*ky-K?+kV@dws*4M%*{`=-ZU)Jo3mfs_wtRQ7X3T+ z+s{0jr-e6UHFh+#50{pfwF?V^tD_U&zhpIx*^jv44y_B#idtsI1(usanU*e{mDW?0 zsfOIItNQ2rM^|;}mvf|d`+ifqbu{J{8F);R!AK&6jBi>ANm7Gh>@CjrVORShqr<7U zv@1JUUM_HlE1@M{)#lOZLQ4xy6@p`L+0Xu#K;Y=lm#c}T>cxI!&LQGMWPr~vUiccg zzh|6ZJHO$8*Cb!T|KC)^knaG4;#`#b%E3S5Yg~l?y1mE4qmW8mepe>?4FO5{JiP}{ zxl&~Ez$dF+KG>Cqr?|2)YEg0(NCv<4fx@_wG{hLc4?ao3`_qOYECuVG!GvJM#hYzpb z_296cOIJ&otaXt!)$Vt*y~Y*Or}XWR-8x>=e&|gR<=~22Utr}d{%&aEpnF*5%>!Gv z%gO0`)A~MZ{?xm2mC@M0XrlP`?e}G-#GX|gvGZ*gjjCMOIlewNJ0q`1QKEgz*{w6X zwC>y*V3O&r7xeGPBX|c0j~Mny(*-963xb*9Z4Hep8_xvaarQ&sPM!Lxoh~1lJi7mi zgSXy%Seib6%-9Ljk8ZiQxK-{0G`@DkE2YUwC%WE4-sUMOeqUZbMgi*DanJp^t)usD zIXZp9*fH~^N7o!4dh5Xz{YFo^a-DLxZ|6>Qee?U%wYV(1ydC5|nxJ|buef|f?Sw0& z>8P?*F|YEL(erVaFY#^~Oe9=icw^+o^M38NGMq!0t=0e7m&!z?FMP>qknEABmw>g?0$P+*1enOc z{E_5xK=kDCf79 zva{?6+x4;Bbp~sxoqa{QKRbSwl+xO45oBn`uM}+s&>__E4>=q zCf@#4#hi8X`wm?7@Z|DiWO11z97g_$GklJ-2vfDr7IUt#!UX%rAxiNLv~_{y7K?1B z_&)3-uPFvK7vA(1i)=ukOsdUfHUqs&6SITu5%< zNBhOO-}J^ziy1the|ddTi&BTVmlbBWsT7qPi;1;~nvlVg5G9q@ue4A;5mRb#$d00;7Z`f- z-)L9;XiaRJ>E(-k@SC_SwhjBCM>yw`ZPcY4j!ml(kJMaUsYc(nCebgb4_wZO3F#jwBXx^S6YnMBDCcY5 ztEs11hU(;fm^>G~h>-0=vKTL@c@Mv}MHA)QuilCA!_TF5QU(a(UR29)Hu`O5jm+v_ zU`uMmHN5ut>iSOd?Ys|(2NG?;Q_(-2vf_5m3v3vgas}&r&Mn`8Tc{--ej2spH5M&Z-!k`#x{jRYkeCTr-kS%qpe?exkkK!%GH6V&|J?fUgkZ3>X0y-1F@n_|t z8k9+JjCe18JYU}lts$x;IFrsKG(!59sT{r<$It9zokMw3h$_^3s8y#55yi#mtwfwD z4Z7MO7P1qu#WjYWS6-{(H;`{frPSw~xA`3sbV=|x=S@x*>4?sA(szkg6Ipm`r0=da zuoGxag6l9II3Mvd^_HN;W#}LJkUG^|ZIA}VSJ%WAqan^3wnRE|^-esgK88^{^(dX& z$SvULB)gbi=5ztW=sZ9)BoyQ16-a{0`H+1aFG%nrXKh_hNIH+<8fT+HYjHi7F3R=t?YQTa*T6)aPjy+6XeXX4U#A2gqv$$LvYG_VFrE_}(%4SU zPkJ|Uj^dRk2fcA|wKIe!Ch)x=lF7PAux=Az3WX6~$!j)0_KF@D#?NKTCX zq%6@#bN#DHiNc38JzA6Gp)07JU>!aNiSw0D^kE3zZybjbwKH)Sou0Tz=YC9pxLUv0 z)i`e^TS8P7`Tefmsgsj)NPU(IDl6N~D74=Iw6 zsl=~I7D)2zuj^>W1;3(lqWO)arU#%Gk(?yDtWz8FmiCm!+nlV_y1PvV@K=FI^h9;bmXCRQ{4amn@-Ue4RX- z&iLnV|4kiqU7}_~&!zOYq}lt@CG^L?_+P#kDjL4~f2MYl){BMZ!YW~%unET6ZNmM+ zLx}wO4M3`&6KaH7VW04d@S5<3@V4--@V@Y&@UieYy!O8mz7@VlCZV5%UxcedgWg~5 zK=*53)JNBTN5^n{;;(=A`@j4Czqu~=ntReQ)tCP-Uc?{&s(t={aV}l|@6MmJWXWax z=PYrBm#;j@5AYQo^D=em)t&zz&m@Lxtg$`~3^`7}pME=D#(kQaqTl+zI48l1%^6QG z(|dv+6F+heZG3Aw>(}S~yKDa4`GU|IFrIfRCm>n6z-HH7=q2yQdxafXmpvvt2_KQ&!i&Po!U5rT!t25j;VAN4eIR^< z=sWWB{8~6GoD=>|GAr8$xSXaJ3F5NsYhDN<0=r%N%J${{(|09hepHr!&CBpF&Qows zyKBBzaGZ&kp{%KH56aosy#7^v|Lu9oiDYkGDQG1B66#g{f60=oSxrZrV;Xtva^ny6 z4qUT*qM`Jed4CGm|*_4g%g-cr=j_}QGQMHuhG8bA6DjDv7Wy{+Cwv$2eac=X;1w?dVxvszl)(ke2so1 z&BoAxym+t0mqE8LhP~!hWfSZHFuUMTd<%}Xo36~mB`kvIuy>UW^c(OjBFb}DhA10i zMX@5f4!^ysOxJ%?UyXH)byTKbnTMKwbscjt^XZ&$-*F^oi9B)Y%;WRp@xA-v@%hJRR=qIp#QpbwH0}lI%ADhK@EsHP&HZpr z-_d(U-+$u7{rEuL)u6y&={TMe8WyQ=$c&}q1EIod>3BS4@ccU}2hX1`9j`AE>*f!x zyn_yLeoI`rTc$Oq3-f#OT3k%dIMf;gZ)w7+P~K8r6?<*ghx5^hy-b|{;mnaW<376o zfse*Y*4q!tD#T$p+`iaKM1VR|0g0y zr9=(=zO2cLk)kv!Bk&qlWIdy+x-GsbehL3Jvia~;FtT~0;;)}R4G*A-)2CTQtWz{A zv$!}jE6UC$OR;;e8?a+X|BANTZW*^#RB`PlHV;>t*gVwr3$8*9f1w)K1pV6NmC^WJ zIa{{mj2S;3f4{cS& zwkCri7mEuRkXb&%J5+KCA!~>Iu6C?hKS*o)PTV<+WtGNH_X_8UNCo+#X9-L15dV7I z)@glb?HCyUtc0cC(9IIx!v>me=pH{)%yK3?GD^&ee|Y?O{5UH*ew?*Zhs0|P6*sa_ zS^V^v-eaq}|L)QFpWCw3o*k_58g`T8x-RkWN?3UC6H~iS-8qC%Svmp8`$a&G=#mNC!aH<)MHU)X!`+d9R2v3;G`)>T*UTz_TbrpxO;h<_j*UwC@a z8}W0@^Twjn3vvIM@hi$Ybuc{qDDK$rF-YiA+Ah@NQ)zFSv3$?3ZZ99o`Avtr;}33s zVB=-FH7ohN|1dT=_H7{2tO@F1XKbQF>>fkh);;SlZ$bk;Wo^?WDgH}YZ11m{0g2WT zEU3!w!v|SF1LWjZ49vLx%yw7TXQ{1bo{QmT3udeMrw)&Q;ZM$zZ<>r3;(U>tSNA~J8=(pvSD{(*gPAf*- z4~{jNnecL_-JRr3XlICab9D2i0Y4_~n0fhm`QW=0zudcikMYU!nv}oHoIa?o9t3~_^9h1%gW9w`NK?Sc*&f{=ik1wq{oc~a~^$k z#_(}16yqbeFZfNDlH6fC4|FNWK&udyAg@5HLTDX082^%1Pk&=8$c!qE%b5G*!cgmi z?K5t>sY_mF=gJw=?;E!4i9VeI^WXn$S7DENooC+HzvJB5od(`JX~Y|kzQ1M)YS-ZR zassuxh{kvbRAk^cAwPf7G<@ZKqk2EReo$KUrUj{wOxQVJe*ga6f0)$&)ZQ(n-52c4 zetqsokD`{`hR>zXl}e!ZV@H6t(7-Ry2smUIyxbEVMKomJ#$W{d(C% zd-wSEGeEq-BWi#=M&60Ys6#-BlI}w!6`TtQYmB?;!e5tmIg_3&C^Y)(Ue@g7hBcEw zu!NM?5L87$d-y9$e4<#2cCQC+Lx8utFK>50_9fc=gyxXM5WMNR;nk~T3CIUF+&3Njx^^J`$L*wmrzprkHi9RSPM#q@TP?dAPpSwV0o0k5nprCHHy~)sGE;uUPK$V1UJ-?U6I37J z@XyE)79)gaaM=l=QU_>X7@>K30NDl$T?H?X@XF(wd` zbYFG&eh$|Pp(LEi6prqbcqU%92>9rb19L&)rOc(0cUZ)*ja zLWD(a)q)&h5pk_&ZOY1`yQ6ovU0G%frKeY9S)6!Gh=a;-H{vL|i1aRH&wUm4l zz$VQOdj=fm=3`Vk!2!FFlj8T7jwb+`wkurR68rYQ(_AfEhHn$nFQ>jB z*jw5AQaWz3WHR zccByy?3q`N9^LcS(LcO;_uY4I-FP=L#E62U;jcjR`cOa|ys%Wu7|?l_UF6whV%esPi3!cpL}Wwlfkj~f_X;pusKaL6kV0+GO$z^=faz@dN| z*qQ$f;E>@`L-}6dM?eTI@F0khCt_6Rp$;AF&dx?>=XG*|KAv{T4K2|C-x^9|?xR_< ztjvHN@MNwGpX0^FGsiAj@FY+|yG@xnXVkO-m&Qd?z2OXppZ)fG_V(!=D`$*~|ME@z zmf?k&y&u1M&ho#E6APcIZr3F%nA-A1dgdn0A3GtW`+K1Ka{a%r6yVE%@%4X|2%C8o z`<*yQx)XY?ZtWTrZzJ6qZ^Pb4Elt1i{S;VEQIm)>*sAzZ)=odKZ~Q1OZKlOvqr;2S zZfnxof32%iOJ}iX#fj1`uKm)Lx&s9m2)cixrp~5K-nMP>y&V@+u@E1)J(mMVoX~m(NiD7Qv^P0RDm)7)B^D z$?9ot((9+#k{jjiq^7kbKfJNk0xr`&acS>OiCl zrz7R_;BB-zn0P%R>YHwgd+PD*M7N#^x}DOWiSkSs93UDRnIIZH6R+Qp)mZ}fj1gPFV{eFb}q2KRQ;R5c*%HEx(_7w()+S@%!=q$ty5Z|1`zs620~M>3Q_} zKjV?y;32WFpolOIkRLqxI7F`*@sY{_j?ePFnL1xWpR5rAM_Nv~aP zfxW97p_t9mid(w#QVR<5T6J=lruK1nOC9PM>7L-2>b92UmqokfcZ*i$S4MBi9~U*M zZ46t1Rw)((1`1B8xRh=6#kQ; zEQhOmi;%~x9u!By4b!USm`rS$D5j@`(n{dAoEr|eji682bGz~h+v2^B4*V4zPSEtMAx2FJhC@VK7B|_R=3ZQ@R zo<%jKz~xsRlEPhl>3em^Ybo!ioJ?s*kuy>%QU;_*DZw_S2bh_I=m6x0YiU4NCP>KS z5q(F|wm)-6!E+s=ld*O?J{Nsy*i+(%m>xWL9*zxSb6~B4t z&Xtd1Yk1zQJ98`ZXIz<-`t;Z>FK=RbIXQdqqn9=+z_dJp1|k#4%4m z^6I%?ptKSPvlo&Q!w!27IMSXR2%Ll0fpPSlNPQCL7vX%8YCeUBOZ%Hnr&$6P809JG zT-a}6>H#4LpW7mjuQVEDE=(A*8(AkMkKd?@US<))K>on;FN}159-}NeJVxt&u}U*J zB6hpUf$R$8U`vjJl-6Aj0y1%cU!DcSLtSWM)On_=Xs|jc27x+NrCB_RJL2K~iBNtU zbBXJ)+@u%l$saw%bUluB=yVh=X&z!BPgNiiWRc*&;Mm}-piGfOdhZv)Rp$({!t*2U zw!@Juk@=MasKXNxK0MfesQPN7v zOB)>Q5TMHYQ*$+cX?f|MwDW1=th8Ned(s-x_@#nv>~AnO*8XY32g%wk3a{PRG~`h`v?8$=#~9RW|1?>9 z)7i(+mxZ(gzjN5+jQB27LD06jMTA$o<4hqLim9xh|d3zT7c+e{jcjfr1c$L z=1&^9OgV4`N}-5d?3=|9s3pP6?kjCbfF~Il(73oc88{y>2!V{itUz^OCtvTJ416!2 zLPk|fMt}*+nBY}YJZ4jOyTeK#+OnMDQM*T6%mr-V6znae^w6tU5hItxm1Zi^g4lCr`%( zzHNt$5Q8}?3e2s1ZabVDbzZ-b5L~=XoC_b9$Ky94CYADcd0DPjC>y~X#)vE8?qn!2 zwsrNitTmbq9;4li#5%G=%0NioIuU-Iqyc4V7Q1Q=c^r+2zA~F9`oip^4naN~5|_x= z6BR2&nTz9~#a{{lbaA=ZfN!=SzZp_>IUTt+#I;`0x|000b0m8^pXls>F2pxb{}Fnp z2jdxu=ytBTY_YQpMB)vwOZK48Xz+QBlsKmALd|M%16@CjSzTtBJRsIqAmow&nv!-J zM1=#Ssr!U(-^>mo!~|bolaP!&>}kD>bV&)&3+f_IB0CxFVw9I*s-vxqZ)U&u$)Qv4 z#`|TJW>&PWn|EKYMAov)2Y1*j*UO`>?mDuEbdUR>tq$TiMP&Xnupb#jg^>OtOGMa> z$M6DEBIBEKywB-0rXrZti_yp5z$c= zHu8X=BFvSBFXlzP0NvrQT3`CgFR^YCb`oS0T!wSis3B^sw(V2iQV!X^R=+l#vR#pX zR&5ow5%O?liabSmKz&*@C~BGThj6KgK$v>-`;O zQ+jq7i8@yx(fJ%Q5|I@IpGb;hN67%gh_SQaZ{Mc4Jwq%P!rn1b3!%BgiME6?b{kx4 z>)5kr5%u!gm+{Az%X#sEY)^cCY`mDUC_XaL^K#2(2vt9_!F-)!K@T!yYe0t^d52Sf z1N(6#dx4)l0NtswA7La(*~~ zt&MdH>%dFja9p{Y^t1Vj(Y}ufnIUGqK1gSlfo4y_a$E&lRJu^FjqYS9wN#{BZ<*qm zp0Ze)r_M3VOW7nnBv}mk2#k|#F_lFe( z^8#Vb6}Fl}=|)Es5$nD@6UTf2KOW~Z916M_8U@NM4z-pz%gf{EP0TbDDPtq)T(8A? zN7rk4A+a>laU$>y1+($Rksh{3=jb4Te4X81tty7cuA>QB-L-9)&=h#^g9oe_>4C=GTCo(g;&RD7Z1 zuK#hLcFSwrk2s!pJnMedp{Q;pG{?QcQSDMNid0ibcKFQBP^!V7g7|r_6lCtUp&`ZR3 z%y-Hs`}&6worYS zCuy8=Of&UE=zeW!bigk6IlCR`Dq*(yxPG z77(^-5n}*oHeI<)+Evm*r~!7OR1tu53A>7prABrp{l1^#zJ%`O6$fUI&7PHAoxKI& zpOnp2WKC8&9o&Zi zi}O~C-rc)nKdg>z0&xW^(5XB?77N1$At+Gyv-5b;MQtPs(MWYfbh1K(k*8XfLW`=V z98Hywl1%X#OG!kU_`A`hy6iTyY$8j8K{kdRB^C-!CTWaRzN*EV<q2Bm_&1`Q*Fg*YV4*Pvkp2m=I|PNGqbLcK0X!hETVV+d#(<2B*e!Z11>187G7 z41eH8-E<^4j{<+KX)G6lQXFaQ41!BbqO&Q=5rzW12&W3b7=&C!DsfX%Hil&>tx21q zi@}PZuOt#ER`;w~vwC%1UETA~=W3B?YOgL+=ORVqYUWMgip76e{n3!?3_G1Y=dGtv zuH!@5F=BM2s*l+dNxns2xxj}&{Y#pvalVcKMXOUCgX&}92k>nKd9ex7N2?rMDg<-+ zb-_mSq>jVL@1(xbc&AVJow&WV@fJzB;PX?C;IulQqsiW?-F58`lacJu>;IJ3--7JhjmG_C`y#2hVPV>f5g>(r zLZE6e2{Ylb_)sMaE3_2J;4tFRo zBe_CF;UBt{CWEgOGOWa)`1ZV(K{H(>gGDk1W{6`mW@S`oNEuZ*zC1Tq9yrz-Epii} zYrLQ<5BP@g^f(NgZWhBgx)wnv@1t8p%nap2d+~tos_X>bl=DK<1~x|m(H9yNz<#c3 zb_qgs5%mx4q68~p96>zbe4$FYX04Yax@b|s9gTQN1R#u;mdNubj9VCr|McVGUN7GM zdEMdu=L0vkyb)PayDXT{0~uga*+6MP{Y$SM82u)5Oa1F#d_5koBbl-)ezW``)&jX~ z&dXMqqD%9scT6y#!A6);3Ji|(ng*OQvo8qxl)1aP?+C398CtpvBO>xK8I|s4M|MEX zby~t9kHru%YQ|XrO%c1>Y?$D(fS%}lFq;y71aou9?v<~Ra*)H#X*q6M&Vd~gT1gIH zSL0a6EJPupmE?Fy%HSBv2nmxM-k1E?#+8#Cb|{DC^yh8&kufd*e7-msV{Dyov zKQ-5%Tq#gNUn%(c-TWN60^eTL^%6hVOMq~N+u+-TUgAfJ?uh@G{}iA}`seZNqs^q& zxqo3rLs&Lt=ZUtZu1zfCseie4La+p+W?pOYXh#-rxVbd;F<(PU`?0w1J8ItKad)H| zmz-GkVyC*)+bTzHF0M--%D{hU|Zn|Ue*e#314oJZb*%TB6gPz?1 zUPZL#7pFsW;1cGVG~_P}Xw9pU18_~b&0$8;=HPIPW=Nc-%}d(hxi-__3HY*;ru45QPI|=Zru(HO zB_jBF1I$80u`aO-FKhhVRRX<)tG! z$@kA)-rN0%dBDwA7w}ae?GYQTVmV?m;H?IX+e)#wFkzpZPvnBHRs+vKWQ8L()ca2}3Hi=^aTmU)OS*U0FDZ)h&zE2_$ElPvgcgA4DZa;SSPjm& zjrZ(b%)t%|?<6W~DGLKDJ9WBV5{35Hg+kZMLgm29^OaJxa#m$^rBo@2k?X6k7pt#l z*9%c>T9+X~9o1ZhY$;liJg^&{;if?YE35kqD6Q1Sj>TL1_N7AQ*vie7$13H`s7tJ@ zZeLMaNrTi>)Vj2?x**DrYV%vrXLW6Mj((hx#g8>dIE-q10h|i_-^$Q_rU|J5=NCi+ z{>LqGZytY>#BbSfJR@uaz(plI#x43YiraMV6$LNyvqRBuw!MS%p=+z}@X@j>aX#iv z+Rv3(|6J_n;-gUmAR1OS98u2k`tM25@Lkl8-6N?q*tTb()GXaFr}>J87v=H1{>KvM z1MOF@zbHLS^*3d!!3n|vX*gR8=`IO5$&BmDPN_38U?BZ=GW~Z988EQiCE!Dt7Z1EA z4W}#?|0FrOiW||k0A-#3?Y^`iL;;R;w7lN$WLxW`@m z9<+NFG=c%R`jWf>(d*hf+{N!e=X(tNd~6#etEcn1^i%(W{?n!3-H?7^p-{)>gleSM zos?g&Jdpf*F$BSXTGwDW)%ClQmZrep^vYX}T{-F}k9;)a%MKta6-t zyPjdSX%~^&d9BpziijRT2x$&jG;DU{rf{SZHL$8oJRP^eY?RKjk?YX=c(rj@;Ju9n1rKX`|K3*bt)?> zVSPr6(~GKlzAj&Sxah`~cdvR9{M7Il`CaiHWeoTUSu`22^m#UwuH5Y`kx4}dtjQ41 z%I|L9PCS^_a7OM3iCoU2+7U~RM=l6-bXT^XQ8cdg+7`yo!zVL8YkM~HN~RLbWb;gG zO=4+TSvZ_g7%6GnHZyZf^+?KdH5tqwZ1>DvI+TB~ z-@dXJ<CVgY7tMN z)AS8C+mIs%KxN#S8@3AC4#&PY0kHr&03NDX@8spvTUluiOc{;-Y2^N$!eB6tf0J3= zPrG^2v)#JS@3?hT?=AQDcydek!;ud@kaM42=4YnYfBQ>nt!?|Xd3S}_u=qQv;>7J+ zR;2_B+CN!Q)VgZ-tPi$l%-cSq-{CJ_do^wLJ@<|3xMuql+Ho1t@TI&&?#Zz>pU?*G zm05TH2ziiwq;rICkbjyy*8D|}H)Rz#^II20_9LZadTNV; zl>Cx{)YOhVpaykIx4+)Dw6s<0b{*sQ zEbm^{v0HcaM;3A(uHgNV1q`VhgnsNM&7a{t=@p{`rsPd*xj65RmK*cdv@~4PF}`LU z6AEPpTD9t!o!v+8oQ~0s6@ZZK*jA$qhb`D4*xl^t1t2|U29zTa!0Czdcmw^I$ubZy z;N?jYeNkV9Pg?F{V|+7xB7gJU14qnTY6Fd(*&Wrm+Vak{+jctJ73Ai3EhwW7O^bFZ zD9rC!5KSGxJ9I#QXF+O~xu6({4k#g_U$*Dl#w*?eCoH6*kEcfLzeIQWh2zZ}tSbTW zMClPp@HFpe=NZVMWM3yUA~q&#CB*w?-A)qf*Ih5nN#sQ2eVT1(*S4&b%c1g;2$1nP z14P(=&k`@6v+%~UB?mN!s-8SzZn%>DhlsBa?jQnANAlOzF0X!zcP@4tRAze&N;$tK&~Z=Z=^#ZRo7x zg3U7)FP=>7f<$I~E_+ODFVge;g{XmsadbbF#cpUvgW zALE$mSdL9{+3}o>&J8*Intz~Q^83?W%8_cB5pxk%qYMV4i_=*~oy+5O13vSXS{Gab zb)j$r356)Txv7bj_$%dbg0^zcAdlD9yGNGxEVfsII zEhbQa5&lXrH1#7*t$aRka~KOC@&5ON|BpPy@DqV^@oF*mLY6cuF(Ev&V(pVO*8BX6 z2?2)5?1cj{4{vThA?PE#O<8h;>k|Sr&eO3^*`bD+(2F~2KJ_}obfh2Ii41O< zuMMZ6OpL8Ho1{;5jo8(}d4Z=ILh?d#QNyIDuZ4K(rFMTyco`n5nRnY=d4pR_BdvBC z_B}%IOY#VPG_$dvU~}3cNYg_ZSHq&Kqo<#+Tz71c#P(jLz;W3lOf{dj1ieUGo?LjH zx!4(4iq3e|1%UTKgKSgmZ)ASHX~o6tN7)%0mfZC~ML$bX;Zw`)?zZx6J^S5mohSG0 zD$?5FOZi!G0M>*lh_Z%?(D0r7C-HSe(Ghl#1&ELB-Sk%=1C`r;9`Ym5MgCMHDZRGqt6IGaQ0(i`-JRKnrvoJxs4zyl>x zBl-gydGHVOSZ8cjoY=iv`6~YXA^cHm;4P($g#Fh5i;SB+x0l%|t_WLYNSvh?^l%z{ zLk@^=3LyjC1hVQOD<9q!{vP!fdnXwuo5p(AOFJD~yhoL{)I;9$UaL_V&W4Huy^}3_ zygylgvi)Q?$yV7W+a(7!J&~2~GpA@Vn-DBxwb_lT!C*z%s9JSx;1VUI0sweKwt8^} zmJ@V$qgk?*ak|$FF451;{Q-Z|kB_D{x{`_3*p~pUDf8Wt7BUBRm+nOJ~phR2j)p$2? z1iSC>ZFg;YkOks@#cHA9DfIrw=zT&M`Ce;pCG1zx@tI9jl`z~o4A_GTgPvyOEf$Qn zLH0pgK*6&PYld}z^?mC}t76scu2L(}7pa6wcY+z*l1AS8jE0$rf)H`C1*hF)x z3j&V`ZMlUG+Xd*sRND9#@_26olU4%&JC1dW9p*mlb0fTs?{)*vmv~L!4oSj6mLfRW zh8IVYv`kO~ACL@ub$8)RVYCqafaM(T2uk9n??*wE)@iKo=+UkoppS@f_sBW*k4lC0 zA4#h(ALA6;9j|ykZrd$Co}|^&pw%}ztys|k!G$MmjXi`Wn#oewv-c9Hf&K0u#U0rvPuK3%n7n7*#0yiJ;a}Zhm3Kj!h()>p8iz@$}`Gv z5l|_~L|iK++Nkhw#XEjOWSqKL71h*%u4GV|?0PU>Q+NCO;EU3J5Y!}*-lNM@41l>9C6N2}zh&Xfsqb9dq@oR(nc z!c3UU=|zsioY+W>cjYjghGWY(rDMMpM748qS;1xzUzl|kyDE!Z?ZE5i{U(T@RYyiBB)iT}oc)D-Br3vk+q{A02xfil<@ zzJ@aGIaRo~AR+{bHhlX+;OQR(zE3Lfs>c9VXE`8!K^{@~v$2Udjb3S-2_ct>I667t zhIS4K0$3GMAo28x=f2V-@w`&QQ}EXLpT_JZc5SHQFuTk?#qV*XyV89g|Bd!u?q1#- z1H+VIhM|_hjuEbr?kZoUe}nr$@m|>vkXnGQf0mXiCexFo4oVxeCB#B!gPhrdL>2CI z>;P4E1@{ESYV3XT^d;Ec;Vj3u*TG;vLe7KVXlDtd4kd0$$MMg60zl!C){eME&s}1s zc>+FA*eVjt6xAv4J)i{PdQqP^gbakjOG4{(qFFMv2@&ZWMl5~euiv>Smu))T2qQN^ zxqq9Zghbx3|CpmBz78Drzho(Sh!1cs3xO(e5E*2g?7h84Q%6be1P7*XYu)}*c*A|G z*-?;#c$4T2KYdag$fTb>(ORM)x4@e_PcAUFG)LX?WHwoyZ23~92wO=t8cZetORFZV zPA+OKtst2c5t$bbCP97D1nIC;x(Hs0x-8l=VuPP$go#?O5ann@P0(3N>FQw z-0&uVRAUHO;Jzmc_Cbz8Td-6(YYS3`jkV1}4h!mW8;t`yjRV^l+aB8?n{2~!H=voU zra>A4T}1PK_A-)?STGYxg9eD3fd)$7AjCD1(1*(*0SN)nuUUL>M7ZGKvp)2+8((s% zm?by7_;jEUdRn40$-9CK7yRQJ+Qpcyj|W<-U`TND8X%J)na@@6`O!!dB=_0lB|GNC zb@bL}j~|m^h$cg$)nt_qHe7(#a)I&LkQ?Hv;J~bzOj*GWM!YcczkD&+0sJ(an2P^F z?3BPy703#RY@jhHy%>A<2bMWRyJ0j7osR9hT1sp(L+Tgrv1;DjEv%-#7SLXelAmVD zCt<-zhcTf}>%P;qGff=EZniCQ&r4k!sL57GDFaP2mGP$eCMDO@$KA)*GvwkzEb1D7X<)y& zK(pCBd6wP|a=e8TF5jM|8Nsg70LQVO%?t&2*iZ$?P7d)$p1j-%6NO(iHw|E?rv_Ho zso4=^UKT^B`sMUsp41zjc{&)(g&bcLIt!5k2dWiuR$|&AmO^YS`SMU`gf2TMO$Tg@ z?J{#*NQqO@R(AWG3a0%s9E%8E!jTUeREac=Y4KybSk8OzGDl7P7}H{JvGG@rzJH00 ziye6r+_5{(ZhM<`-uHC;txw`xNm)?zI5jMbJ>OW*ulB3mV(y4DJSH)x78714Syo z+Dp-7SAS6g#w~4R>VC8`2O&q#^7&hLYUe-m9d{>|x}wnCnegLDQV_H9nJ8a0P)ZJX zvGY|!Mdx@byc`-A4&)gtHCsGvs2kjD^+4Y0jVlQ9cE#vGM1-g!Dv#DXltcI|KhDK^ zM{J1W_Yr(9Hw%B!ISJ z83m)v5Vj#5q(DQ_i3(!HaYx3sKt2bd834)0&_uw zCjmt-Oz+@VC_%!UD$>exGZ|^*BG<;Rz9>cGZ`_!fR!Um!tJBM}Od z7jpssy#@arsx3VDp*14B&FnRq%%}kF;vzGe1<`1xv>68x3vJ?VH<;7tK!KwO&$}Zi z#khgejM9ZkQj-HWnLHY2gTkiipzxo`JqN@l-)$l^J|w5u{=@jHwrQEI;59_|wypYuWxh%#vKOK`gew$oiV22Xp@b?U zG@4Ty4@!)fSR|2@MeYRs9}a++*%Lw;-ZV7JV~L9x3u1lJ?n$?QLS6=?o+z)_uKa}O zdlWQ9woW`BkZW)&Y&Xh}gT@x&Q?2vQCRQ$$n?#>{ul%HZLX;6OU`j#ajXbfHsh!wS zsxbAE%u}SPrUlXh(|+lI>7sbqWJLI-h&dh!Rl&kkAea-B;2Psa-dcpXP@Kv|t(<-r zx$-WC%_gW-$W3NK%z`Lm@esC%@IOV!CcdHY1q(8)d==q7r)_F&hO<_r+PhfAvs*hm zCw2*;&454sI)fHAN`pdPBn^M|9LmM!%Ve}KSjJeHys-WpyAZcZov;3bos4Iq?_)p2 z>UO^)Mp^s~;=+W7EGPtw9~|?2X-+YCqMF156MwkR3M-&INfb5Gvd4TV$s}y^r=2EHD{J8my`J&liRH~dF50rCJ1u}`- zXmCYTxWir5*~0Bku^?Hrx4pE1;AakCRZr}WG-l&J5wM$5WdVaM5n+x5JAj*<%u{7?9o>V ze&pBDCx67Pe#1Ra$(n_cMeBQABI5vMQvML}I2%6xJbet3DwLDYol^lf<_NCkhfkb5 z{6Am45nF#azBm4%axgyc@2|VzW%r`(PmHm-@o}+QbG#Efq9Vm1geMbj0kt0@Jr6q$ zBNJNQaCw+=vu&hzuy2@uSU}hKEdQ;(mlFP08s7i4`bNuv=IWa{p}xT?^6#`ZK8&3n zW}iAHgJt~xptkWu@nh4OkRC}F)1MDxxHPVUb)~JzNN|nPM*9=LQr0kd{wqc8mj9%r z{bvOYV@bw@U&eh+Fn_|+?QTS!hJ_ZougO1UBUa1^6@~Abk(~D7E7#~X!fVid0z+O{b^Mf#4;7Xyj&=`Q4X<8^Y+n`VwZBw5HO z?KnR_6(!_QUn2;e$%_x6x4I8Y!t2Iopdj1Av;%?xV;Np%+_q=Q$}NpTQDq}Q9Sp6F;qARWy*E21Ii?1u zrccOQXjv3ol)k~TDR{rl*22nq77WjQ#`ru|u1>SP^cCB?Hc>F-Oh(j<%@=YO9V!xw z(n5xukjKD07SV3=`B)&Q!aRW1w&GyTl%LZYA)w+ACrLCyfkNSs=A|5hPK0hboe`(x zY|}b}=N7|4ipHAAV`JcKkLxg-z)zXhEg~FJ@y>I#;@d=g8uAz;nHVJ!lg0k6nhgWY z7kWm)b65*wyPm6)B+K*-d`d0|FH@R0Fm2>QW&ws$-&!*es-2jfsLXqxjKDOYfQX$ja1pvD3<$XI@+S?S{2a zcwC=6`fU6sX+ibEajVBIxGz5A`Nvm2w`}s01pX)l!8PB)|IElzYd3@ANe#3(kaAPx zG6uPOwL$Lh-#g0^H1hCa>-R#K5$#RhzUJ#&rWA8?GP<~Ei5rLf^6Rcve!|XxZ zIf~JY)F?=~EjOq_1Hun;jRv<1w*Vhu@+NW{_{|J3YVdF4JQN8(U(!?M3Q)jK8O|GI z&A|46=WwFHp*pI~Tg-|^iR&8)W=a)nPFSjAx?F#;m6|my{@wxlty!uz9?pHsHPhs~H9eW}k zY(D1Y4B-bYVhjd?)8#wVwd!x>X99*)sjD>Foyi*a8KlZ zW=^Md7b?M1>;)hKWCPr6iZzdwhjd#5HdxlI$it=%fdyHQjYHu(jU&5RxdGT}&0sUp zJ;aqAVJ%=vPMX^SBBybyahgc{i^&`8pB5o5PD5NSyzSE}(gviBNn4(_IZaJ#5dp*M zbMLumA>3XQWh@d4tMG=vI;ed3N5OTRYoX&J>nw7&g+&g-h)gG7(EQc_1rge1eka}; z|0%w0`of1-ja%@@>^4uA_n&e9j=pT@5Am@IP(Wheo>_3a*n5 zX$W3*r5JMMGP#HRsq>WdH##a*37bus zH1*gyo=ymR_K+N#^tib?ilNgZhD{T3Evsp6l-i!pP|B3;KioQe&NeT%N&03p(a`vLdW z+}t*#==L6+gLF%sZs^f5UJ}jMtvvFttvuirqzSKpf=q^-4Rst|K#wYKz&$K%1VyLm z!Q`-BH;XF2j%eitYtXo{ngE8O7)>U4H9&-4)&^0Z8gf;j2xHp@*$j#9l7SWym@Z71E=&`! zdWBg*Mj}R8QsLjPnx9kfl>w%6(cn}EsVSmVj4aJ^}uYeQku+E4*Z&+}a>bu^B>z2MLa9xL@ zrm$)wlMS}7SwsBe-?NR6{$9-ay_8!2jqGMm133h`h)HW zxl!of(w39KXX6ZRjmgN^oFQf$5C;ndT++i|uq{QO=!O^W=0S2F6GOCdCMF>i1BR#o zEgYFtVSXT)@69anaTMgl%xushgJJY8M=D6d%#`G9Pi$j_+g88%$n1j)X7&E;K*tF`jr_}?BIwm+y znyOAWjdM)*sRN}k${5o?#~7c|!CB^Rhiv7&oITt>=6Ua@qVjV0rjcbCRxMLYWW zkvHFcbp(?4o`0FakaZtM{0VeB1I*G3-5cv^WnV_)R$$S3lr8OFRw zyU2}^`H{7<0{m7im>g0l)8KKtZDBl0AjifX1#EoV8y22=PCFEIK1bm^j6doS6yPCA z6R(XWU8&lfL`WZf-;oH3vYHb%QovvsdM3ToWCFi_Xj@^Kh#B@!cZB(58?Hd0ahvQW zjixyX!Q_a;YvOZ?w?wXWaJL?Hl~g!EN*&0B@SMTcLS|d(t(LQ99sP0D;Y~|=eEHk1 zLy9^Uj=gQbvM)bn!&%N^Idot4<)?42Ey=xpZl4W%>`z)NJC%1xi`KvJcZAu3vU~-L zInpEiL+j8@>IV0fO2dP$``l`VZ=gKVaH~AkFjrn|IK(j!8hn;nzCAt>x~CFySj>Wo zeOx|20sD^@HWu#ZVo4>cVD7n~`H3>vR0m}&&_FRYVbh&YP?t163Fi#UEXy9tAxncr zvGn&bF;@UkQ6s()0L!SV1ile|z+w%Td$@*)6BAtOVa*VXyiHmp6a#F?LWb}^u0T>u zD|8q_MVZa*XmBBJ1jYeP*VkYc5w2HcYV*oO_L%sd@x0hzl#9g*BaHoKi%&_lw71-0RG*f9Yuu|GupAZNR^B%L z!Sb>3lJtwoVz4-De%Kv5DD5n&!hmOEBv%iMU@&88sp8EjAZ7;PtR`AFT5S5%h8zXh z{gzG8$1l~AW^t~b$L&r#Ih+`^(?KLH2$NF-wqXh{}_{JrodtuQ7TjKblmPxGW@Dgu1GKftb zT3Zf@chb=S^}c^tJcueoP&T;dNQg^Kgd)dy>@^BHU^ohZE+gEK5EDX?(6P{|kR0lt z9tqD3S7R^xR9G2^!WL|5|1GS*-V0wkoi&?yw(wV`-wI+{5(ac0t<1?TLiu#B6LmBv zo>if>RJN$L_@VW6s~#+??f2cE>el>WQ)|%n!nOw|8KkG`ha1N2*dbbDSB|}NzahSY zEtk99`vd~Im~eB$UlnK>LJk|E^%C-0gd=$!!ac)UUZpb1U6o#$FGLuc<*W&8$f?OYpO+%!VN*eM^$e*!BT%s|mWYr7;)kGRrV#!8$nb|Xz2Nr;VKb4k z!z4zh-2MCnCFu0~6J6&>MQ41zi6Fc=CD?~{-* zJWjr>R4M^H5A0p}D&dJbe}3WK>RYv0xwW~owQ<$kcR%|1v!`zvce?g}phEBas^-M_ zJD0I`@uQ2k-1^a;uUN16w@4%MOZl40A%K7=qOz7{*R`#6 zVHMYkVq*o9$^Z8}=S~uo{qOsI-}@mB=iZq+bMJZ1dCKo;s)2k!RbrbrE>pVUyoaHi z%8_thiDT0BjAJS{B25@|zD&O=W>HC|l!97pzx*@$q`+<_{ zjP!OEX24~1mHahAZso7tz^JJkg=+IZK%e53$#`uA7Ao*yu?P3Wl_av2ZqLu~VD>IE zRt@hmq1|F4uxihG+;_gDGS+*)j*o43zyc`}3A5)p`i1&6x>o8)deFhx-{L#oV=Iow zQ)&^so+$^_e)1ph^F=fic38AN6{!HfeFcFW%y_@yLPZ3PFb}}s1J)cc_`+o1$=R4r zWeN)5dS3*MUdD|;(;`5NRjxPrt8hm8n=m-U-^pjlh{S+GN_apY=E2t+Sx-9^4hUu) z^a3#l-I)*N+N)vrY%>ReQZ@~iBP~c2noERYcGQ}SpgD{kFJV`(zY=xC`@^A+zY7+? zwudv1DrrJ@qa~VGI4D64PS1Y&&ACMT)=v>=_s?lpq)#~cw!79pdEdWwKU_r5}y5hw_ z&49~Ppn^xVB4V$IpnfvNY&Z%s2q-M*tyVxCJO~IAd;^jal%YzO$<$f-z&=FFw}6%J zwI)B0mayd_6H)fENJJJT(jh~2nQ&Y!$tYnNq7%2PC8ere8`r+u-S)hgIBx6Zxm50f z+-mo2V!&H_MP2`=PRTv{=C>YgW@vndIVn0!r%e_ZChN~N) zwzLgdXLS72_=*4T{vgzA)PBj{+1}WTs^abV*wF*KDFtDWSU1EGeM0!mU}Q>26jxNZ zy~QcD5HSRC3jHb$IUykmnbjAT(+())SYMbnm`?~!tE?B&i?Pme{D~R%fjbv64m=-p zC@d(cU>$~JH3pV`4y4@{U@fM$%9kl1jGbk40b}e)E*fM8qsK=)RByZ}Ah2nyu04ppmQ|yzU4?GFBpJxZQKH`|CVumUvFNNcr0?(6T0Kj z=RGc(JboS+rAjds_7_2uh>{k^nl;r9PdY4$(`k1ks78x@Zbt-kfVUiqV3fn+qOiy@ zW_%ol&wvCI%Zaj7#PJ7aDM`sF!&uR%zo5bj(gu0fiwRXg zhEz8~l>~hmKYPbX`(l2`17fWh$kH~}0J<3QOj;Ef{J{1a25lQzO=|&!pPy=0_Ve04 z%V_Hzm;scYz;W965cDZ9!eXt+QGuGq*}{srv!K5y((sbB$NDQNqu64Hbp221hiYx0 zVz`rVW%?j7^lzUox$VX6?SF0m5_O?NJH&avo!4^mL+^Y+1RMI7#Y^5gR=u}<5*?{? z4t-vJ$ysAB?F~MMNF3Y6pjBK9G`;8O7m+kdI>L}fv}HWOpht~E6d|dEB~a9pf#>IS zX==IH)N(~=&T#fskc~#nq;UK>&lm=Km>w9sDI*(QT)P=1nl1a!siTvK(xj3;)-(4W6}ix9{|Ne zl+^0Wb8!*8f5jK$cl2K}{odTacjRIuqlQdeu;=;Q@5H>NE6$ui0*ZJgNvQi+LLF;q z8#RznKIkOCvr&{mOub5}#^+oeUl@LS9Fpur{@mMQD#E>3D@lYD9a>4WnKO=w zfr8xIOa{Edk5H2N-aTI3X;W2uv92R&g6dQ4CBqgrGKV+ec%Rvi_LG}kCvLDZj5&e9 zbWe7pOzmj=v(&v3hO;e$;db|a;JQyBzR#uFGdh>4gthcBZ!HWcT9TuO+Ds0ZwmcB* z9x%}!NSy@qlP>oQ<_V%$o@fqNF`@V{9%X=)Ory{mSLb3o)V62I`G{z% zZxL-Z^rvx{;CzT$)=_m<@d0PvYeuUadT}D*)3Aa3#Xbju$HSz+eT_tRF4PGufu_~T zP!Qg*p4k;JS*1ReDXc@Hf{qo6Q$R zW;I_EncsYCMH`bWFnGGCI$_FQ^wJRkB0Qx zp0S=y*{W1RFUeMADpLgsEvXryNK8w+`}FFm4H}T?Cz^_zK&MS5B||C-GLG7<>TvY` zaziDJH8|!CQCIX6{d!lTHv$sADwkBMl~0H{8`DdHUG#<=+;6fbYI0i&ykmMYO8V>} z84p2b8M$V7AlC@akhKO=fD+WaMD+a`F)Ontqh`SDm_7tQqWP(!Hw z1mQZI7b&Z<0AHg4La^~K{-L!K8HFRe_FSELgEmDw981c%xL#)Eov$$6&?8GZBVlj2S^Ols-f zXRtYO+|p}ApZ(uYx%!enO}=G{=lickpKEU%J@vTe-o3|-a-TiC4F4WCehp#Kp zAm)afk*w40jt44=fYu{FgTpeBoPm@lGOkw?tpRv3(q2(Y&J|L1FxZEb6KwWOsVJRL z6|YE45f#d2QHv5~)Xt5uBE&uM{_NuBYyny|cJ>mvTNKL3>cJ7(RMd-t2%WN}a5^WMB?)+I|GNnd`_fKYk z_SMQ=v(ryswGC;}%=wKB_^VS4GF>%pVNW2WfdaxlLAE4Djp_h9xjU8<^%V?ck`D&| z-pE?YKv$UNb zqZP!4McKV;HP5qa+rEYT`tc%tF#`0j{_#>v+cYw)MILQwkr(H+NW@i=7THyei))sW zb_zaRBAn(i*Cnq^sV;kwy_dbF?83xi`whkRLE%Bs0Y#b(73-s6)CLKob)mxzk^q28 zqP<}hP4cH#e&{tB9npd zm|97wV=<7kjDehG3?w(i;0LA?2C_6U2GVGw0Z(_yz|zj?CcXc_-r3-F$UC8$RN#iq z-;vEmrGA+INsogJPdkj?(LB?psQWLx?%o|&%{Ue8?2K79m&MMGKWEsD*NQ%0e}DF( z86Zmg?|$`M7th-A%c9EN#Iqil*G`40l@R#S3haiE^3}Se!wYOZKhmY)3Ch?R zR{*P#Yn2Byml#-mq$vA=nbCnwbSYQihL#_~Z`)gt9gpRRq8-@v!i+^6DuLD3O4w1r)qXXBjh7iwzycT@c%%1`H(8xLaJuJIEiXsBpOq zHvBD|?l(D7lciO@quYdE( zvIl#QzYp>m8_n2Z>_pe&1n?@mIU-Xc0wtqMW|h2BvZutzMyuo1scexRnwNBX!t>I_ zeLZ5P=W-NXyTzR>R+=?23k5`O5hy-TSm6&5TlYt3jsOuIFry@~{r+YYKJm*UM_>63 z{}bZb^{yCUiPAp7;DDK;i)^gVBjd!T25gf#2y2hcKwb7{kKk6-|G?M9Ih$L4nC{;@IFmU|Gs{;f~G^?L5w4SU{y z!f-wLq(8T7@9x|SfoqQv6&c+_kA;Sri5xJ(K>q9X%+-Gu((u?yUW{jaOeJNeV;( zJ(&pwD>X;^kx4ZRAlTE$&i~2_EfOmXopk~reNPglZjNhmGY)0pVPM!eU%*?703`T1Z3J_}E682>+LRyF=_wEoM)8B(Yk{S16?2BeJcdmU9X4Egn znOQS9GdK&SP*w(!5R6^xwV_j^-{C1<>Rahsi(XT{|L61bC2woY&ujUKI$##kM7dac zDy}hT*MV>ma71b#DyOU-&+ca;`k9D+CZeB(wx5aUN5(zKhe!;(awI=9YCr8(zh8{z zN^}}n^naRvCM4$j8@@jbJ~4~w%QPE-D5fU9g22-<+DxXwSDH=hf*#wuW}LL5Zp4uM zfLQ_Y0&)=?MhNBzd173(`cR}BIV58sbV_F2qMk-fdD$#3Me=Mc=JG(7@fN!r!3tZ1 zR+Ei`#h8QA9b>TFEc%ImB$EJp_hwv(nBv8XI^VOdMLz zPO`;Z`xK-WBK(PX9Q}dD-5IfWIM|rbBjF0a1DJY5$4h{Ad&r39eIN_K6H787BDo$6r=P`Nj`p zcFx0!btw@gA?nQ#%@2IAi1~6CClC%o0$J8nz~DW$;xPD2Ue0`AR3pllPjjImB0V^JO> z7iKJj!h~kQn1cBQMuBi=T&z=KvLzqj0m(rOq^}ACsf5#j!CqGv*vm67i2yPnF)ELF z)3%phDd}WP=%HYR*Wm})tK-FI_w-i+20#JnPh3EpAw#7IOG>)1Ua~bG-hp6;Gv59r z_k8=SJ3xmArrxtT*QPh`AS36-+`X$h9+-E{gx{8GNyHjsr9J=|c7XV?5>=izV_8rl z8g(O21FSeuVUN+WgOrwB6N7I#j|_wDW*IG2JD@=;xYUf#k1vg{K^p?$$+&r*AcZ?+ zx9s6ca+&GKZ_z@<;2WMNcS|g975;RELL-%Ec!3@^vY`r}6G%x|$BW$U1@86sJN$$D!*Dx<{T`}pNZPJT6=ecRCI+O?Qm~qCua?;0-N5sJd z<26*QuQ9eO%67m={8CkUi14(5*LtlbHj`s%!vH%_iC`(7%&N*RzMr6P&=7I6PDgEpk zt0y>yMocrLaTg0_%v_^IW1CfDUXQasm_@XWH5TR>?Q#I)n)Z%B4@X`*uyy6y;Q@Ug z26F9S0Nfs|&3cOmhqzB#s3o0WMBsDX)I9)mEu9LKVk^Zt)v32Lxvj071lp*ug~OSOPM9 zDc7XPC<1?#mU1DZAM&gzh+YTePKDwTw z=lq_ctUS*7P;t(OigT1_ob#dLT-zs(;%Vrm!kS2&1ODR8aW!5mlMv)=pau^)rMvXx zYJj{fHk$R9Z>>`fFTqT802>H?44wxZvU4 z_YKpJ-8koOB6RJxKWuw-mM9WcPxMpoUb6hY^;cZ|knz5H-l>Vxwk<#V?A$-U-}2?c z)xsx6iw8uxr)|uGukYEq`MD2Co+749+mFaizminSL_gGjKml)csKXH`1AY>yE-UjV zs});8wciyjN6kt+vcag60rrlz>xM8^6_Vb2;Pij@GFqVN@#5RD|r&l!&W%DMYZ5C~@K?Vhf^)Tue`-M3EPL)L9&uCM`?my@SXv_;KbuaV#OWH&ueZTd6w(Bt+|rc z?H8PO>_tm=?z>}-cJ%bQ@8}hEizb~gyCS)G;%zspzex;JYi9_>akg$I zEy_X>rk8h^E>h3RorDay$Uqg1 zu>%+(kW)B|mB38_D$4@c}> zwVbQnU8`ZZRf8g`?!91H+mp*CJ#}N`dUY@|q<@`j7QJ(?iD!_Q{A2r%Kd9~CrX#^U zin-W(XmnO3!poE z0It1kGyKoL!Isr0bgYg-5tt?L|G;t(ThqO#S_S4YAz}$P*Q#B z+Zj2Ek&r@oe1 zKJIkHfHxey;HLW{1vyg{sGQUCx>E$I#um+YUg=p_WH^#yup^|1f+b37Olop!R%&g^c!fHa z6f5o}e%Iw((bW_}3}33OBrcM04ENfLhbP>jK#35B>qYmH)Qbg1d0GE(KfuZ)Y3cJrpiFCXqCCbAkDb410q(mlGoKi|7*^~%lw_{PjLaGM+HnlV2k&XM_v& zg%~efr*p$PSOxn?t;R8PRh_W`o#ni4BMFXAe8)Ky7_`YaK%L_NbIBvG9SyyS=#cL?E)4}IJWY!D26KIe8ss3HQz_U$(rAx z(8doB^#g78N!RLn)O}V#{(&isY&;r=y(@ETr4JTv@#<=cki!zTe5}{Bif?zc=Da(= zf8W%4{`iiTZF`q2I-Y;DYOGMFfE;$^ZRG&?FGuQi`B<&yMdV|>8uVHUzFZ`ZGDjX| zG~1f(#~R1Rm&I*fTaDfe4O(l`C)g(1F1250zt!d%sgJbXrQcL-w9TMH7{|61m$yl<~eoOzCIpGlHopD(fnefy~ zX9J44%p+^VZWL6{Lby*fY4cp(fw{r;o*$#tma&l_EVzcpk4<0gv3xY zR_HLG$!K*RUm=p-vJu`A*hGJp>Wm6}kvEmKURAh+_!!0$X<904TKN1It)!n~Afo~? zuslJkPU{+44MdX4I|V_Eqqyh-hs&bUyJ|9^zA~~lvOQvp0HL56CdCeeSETL}LO3Lj z8N0{Xe=!Jx!51w4FpxSo2C_gX273@fi60#uniQHDnibj|GD2HDo=Et&vOz78pev=3 ze7o3DmH;He;SbpLRSv?rH41`cZO2%@%mDXvLs^ z8Rs;gJbFOi0M${@ZfgH(^Mz+PZjJZ57F4(%$kBJCPl`nw?S(Emt9#80U6@s$y216l zOADzfJA6kTO+)?Kq2hS$ILGnM(;R0yuM|tP1&$@oSHy1Z6~}Jp_l`p-VdqS$Dd)xN z#m+^}W#TSvnd2_!{o+xrRczHZI-V0RYdakwl+qSRw;*yw zQO(imc45ssQB{iS3lsCCDW-E6d@q5+JU?~WRm}~K>PAB{a8=dm5Sn3gqnJ$ducgPgH?_zbgG4^5oLG7b_+P#NUp~_k|6-pFKyuk^xLwg}7!sKr(a5k2 zmR%3vI4=4Ntu?pv-rV2Uf-C+}I~im=z3mZge%pN7Nn|;G2pYN-L<7GO3|ey=(Uwew_IX4JJ{mXx=IScTxJi_`BZ3?o@4kw&&7f0}kB2B>F6S(bPhi-GNj zmG#rI7C#d z9%R$Nf1EsJSdKr+Z7(=@TJXf-EMR8hKPY>TT_P%I;^-`|2@DQN|2;~+;^71^f4uSx z3b9*sz?K-rl9k1?bH7tfJM(3D45#fsP2!R4%Fb0N!4%A6CuVdciQwoG+{9IWHl0|i zB%#^#lz|Y7w4trJQcNQ8jkcL2g^?>=!q(3z9QQB=TIoJM1`rMMRbVY%Mf0iLiH1a| zd2c!=p~esjwX(fz-zrW==?zt!zE02bV&nZui;Wi_g&4_I$P5*q_!0xG#{BeR@S|Bl zJJw(jLqQ#vF~(YBJ0h7em~5=X0fsA4rL_~6r>Es41bBvdrBFs3)=7mnDYt5UZWUYC z+D)`RxKAQi=|C(6P??drb8W{1Sua9$+Xm#{OgSSYAnK74j`01)bmMq!f_H4_l)`bD zai!xb#`Zd`&)E9u+H~*Pp{a$_GrunVb;Y!*b9$ZK=bZXWv&M?!@@wNUIx1+TLtOpr$mIt2k*rxVs0y5CKVlVyAv9Yy% z$=aeimw;k99kHSkgdl)TMSW}7Rq-%q*RoEJiaQi|BFF02tplz;aohV}K0fH;Q(OCA zal%7~K6rY}!n*ZWp7raymdv{NlJ<$W&6@k0OXtnJ9)&UuvpM(tR|j*u%E#oq;@d%k zL{4jV>=f&0?qQ+al3|!5&_;7+CR0i1i0)a18JT6v04(9vFPNiMWBySEDOce{qWw`On(gi zulgZ{QRMXuilUg)M#2iDQU#BZoHMKD$ondOYL#V9OD;RH7&!4jpAra7&QhET4e`VpPa)agI%3XZS> zc+txT;pr8@tPxO)6fI;c>Qt=iNYCfc2wJQhEpc3H`oeMN+}GOr;L=MrAFWrsxNz?2 z{lz~aXdbJdx9&X5#tp!Q-sd&+TQf_!^0T~J24!bQbIsY694z4=_r6?OcCyPly*gHS zoK@6~xB#sUe1d9SF&OqE6B%dPC63lt=D85VUCWl5oswRt#?mYV(%GC(i03w#K&Oc7 ze!;RC1DWc90gOqAG9J(mVX%*X2m^B8Fathh0d_`>vBubqic2Vk{fe>2&;y7u?hDpC z%b0K24E)eKxkU3C08nSv0Jyjn$OeA~ciE5(oRlf187%Y`D13hirQ0g>l0gyb=k=}s%=G?~V)D`PE$?O;zA`B_y;qqrocb)T&ulFK)hP#$r$9)eNfZp;18@mh)p=D$|#t_ z5|?FF;@ywFeeaD|azk=&i+8s@wshVjZ$K6P<+jB)u7?FEPML7(1ZX$cfk&aP2u42S ztY*w`+F`4X$29|`=Bs@^%@r=klfZ5ueWa6Nzr*{L-+(inLamm%RKH71=E$M#E*+_? zX!U?RJm|&D>!B2XAl(Es8>}tEoWp$K4d^eJ7z|Kch-OA`Wo^OE0Wm6R_{olgFPgp$ z>6N$|$1+Y3141ep4zm{^$}u`YVpVirS;X zO|sOQ%dSdc{3CdyvjszE<7wuJ)3C6rwvW(*Y)JS4>42__%^V3(HXV z0VUY!*xNpyL7%UY!ajn$(;CRHwitDDRs&rRBWn3FbD~QZ^~qt$GaZxA z)#Q}uDalEx@#$YXW_r%{ofSPRIWzU^^k)5K{cZhi|GUw*WA7&4NWGT+RR7q2Q2){R zxBp=DQ1biKx9L)~Iv5NqkwTkYv4vd)gsB zi`a$#WM=-{R>zj(CD6G#I*rXvx8{*n2bJQ@tCyH&+JJfQJ6C6r?~xYP8N&gNCgH|9 zvJZf)Qb`@P5@~LNN&;H8z~a_-68pB@yXBHYZ~ke&|D9It%qwqM_S<=PFT!R!cJKa$ zd!Nbe%k348f8}~=54^nT-9K-8b=NLXIE}scGWK2w-P|)~1v(o!>OIGL#yd{+yza4I z;uhr&kLt2H9L%9)R~53eOvFFJnnHpZdWE3R6gpseJgI85Xo*yZY`~lVJ|geQ8D^y? zzqt^(4t13-m(d-Z{e)vOXZelKTB%?c&Pdn*gF~j&V0oQdt*S;9IjiF36fUum>wDc) z2tKwixfh%*uhKHWlux}D|#>Gmy) z|M(~3c;cSgt3DCMR|KQ2bFY1D`P5@3jl+IrpDEQdQs6VCT0Y8d*jZ&JD)E#u)UpOy zc&;%hBd@7lRpDEx{yZsvs1}GmGfGwrK$Qnn^~$;%+(X?)|Il&aOVih-u`Ob;g2ITO z64<~Qi>vF^*U@svt0Q8?YcJ2b;sm+dGk|=HvX%UUmF#*6RM9>nw<=yc0kHr>RRMOo z;H(9R1z0x-L})bx7|scZ?Pysxs*-**L4a80{z`RL<;u#nl^vCOr3-~O^(-kI^dmGd zd3R;`>$C8W<1Bm0@t)jN7+8BLE0k;|`$~2XzG75Gx6oqvIsxVX)a!zh>@}#g$?y#^ zUNXL$sRm&U?J3j%DyytR4gyyKqP>N3?670t13zQGcLFvNd5@YqTiSs-&yfXq9I4^L` z^ZX_>H+)rSd1z_EA6X*Xn@8P2M`>TXi6WRQsf|t0%{lzTi!=2F(DK~z!l2ziG!P2^ ztI_hvo%ZrH(2mB!0=3-7)f~|-1}qk`6~fiW)f{~Q?jQ=FpK-6SHj|A;_|`#M9|sTt z`BiqChUiV&NO|QRQd`D27zHIes;fSw!(~F%YG*Pu}dEwiS z-$--Z47BZWM%zwf^bK<^nz(+?34H7X4F>T(dxL3k+g-2`L$uKD$vsA-?{32|wzRgMo&g{oqeC7j_E4!PpP}QYWy4F)%Zjk(< zD=^s0SI`-$`@qfwd2nn6Eg8stBFj@VI_r3{x{f-Yo@|0!MI|_33@u!c)k=zEf@dLt zUVyf-i3r^XVevt zcrd)e;*f=Z!VDYY_&YO78c>)EF!{qZVKp4~#LJ;X%h(_cB1?L0Z~ak^oehnNfeKvx6D(PRnbMAb>3ALrNtBhh^GTAN}L&m)^tFpVLa_|F+dj z&)f2bI^)5c7POL60Mic#LEEoJpL+D!Bx|50kgQ3GVl$+9BA%?H!7UE=_02)=e&}H{0TzUq<7J+OLif1E-SFErD&mn6fFt<`UH!X1ZPa~ zhluPLSd*TB?=^!KM|+T4f7qn|XVG$)T(Km|;o=0%KZvtY$)2*xZbGD$g~!#x7| zCaL#Iu>qMvT#Y^TEIFngCOL}pk^`S%T%NrD3gtMG_aEghrT4$I&>jAT5U4xd&HKM7 z_p0=X!Taw+<5JaUuZH#LQYerOh$FTL1~QTs0~sNS!G2_BfXz%ozRBeXWJYr>3-Com z#n+O{QC(6*ZO~`g4Fq2$c#E6eKsbv4C@?S#zGnu8!4D=9#c+yYbT?Dk%~W=a4kWZm zk6QHS9yRu0N)I%)q@*Q(9zyD>k#0OV^&%@u$;-+`Ou@Miab zBWVEjbe9x_3ezGHiKUfdA4VAuMyX&jt;C~vDe|Y&qQs3Cj!=S0#F>)7D<~-%Ylx+N)4{1DhKMa=Vom5JBRt zK?iz-@j0+m5#5k=Dp?iU6w*mBT^JAp;{lcH58>Ug=@sY*4!sXuWwqlWoG%r}$s(n8GtRuW z$zgiY!P?>9roN`c)#m`x=;~`5Qy#M*RqwId>b3W)sRrC+aMBPt6uC`6{P06DHc=6a zRV46Q-J^5Zw;!F##^c#Uy5aTCad}_FAQb0LRn$h^-*72*U*3(HLtbmAElZ~Fj#+V)ki2+^P1^Q(H*B!!-tWrP~D!x8V%$S zt4#d1y$MgyL^8Ox?@w;Kvm}^&C@vDij|jUQ4@}^1=R0(iKb#{Uik?<@V%em8^1i~} zqZ-4q+P=BJq#OE{<5RuF@8tW@Y{6mE5w0wdUsB$;A&pUOS$W^rt9z8y@?^<(BNYXh zD^01zTwQ{>LjQ4~En`{(Q++;~sZBPbpUoEBf>}za&u#Si{9~O_jiw|}ILRq4hx%`# zF`LtgE;KYVEpvEg`k{&%)3qOQwazESQ};KpjI3F*p1r@Ct?JdQs=RNkB3CZP+>a;e zRfUHNSN3a2ryFX^Fb#92O`C#yPB-9|@z!DYjK&7a3>1k%1&>IfFFZ96=zi}}qHrPT zi;HmEr5WegkbkakZg_4@s5zvD%uH#0$n0H%m*!*YL!oRrKM2}}e}_r{MCi^(0goD3 zgV4Ieo<}s4(Z+~o<$^uJGcBJj#+(O8_Xv-bJG5-tv}OIeKN(_ub4##~8nKTe+h`Xl zU>$h%V6hP{i(l`)RJ!ks7dy8=nz7N}<(8mNRJXARdlfjOwszRZ*!|tmWo8`DKzCB5U|9XS=!{_FR_j_(P z_Hm?JKa7tc&Mx=7THMha&*R^KP7R1^12)qstgjb0=boQ74P5_vJZ*8u6zC;Y8?>o5 zbp>YoIn!+zwz)9e1rlHALF#G(*cV)X42puabVT7&ar2G2lOLWo?O_G<*Y6RJ7(+q- z40F-e0ALYRHF#}P8hw@5cr;JVY*33kKpp@%!;x}Y$1g60bf~+S<{pP(4u2|t2bP)fd4(W4r_t9e+}ZG4N@#hrcJBM!A&HT z%e1T1TaaA@-`ob}pwPag57N#h?d~!)rd_oXiXB#r{vj&GjFg|7DzsmB{~Q*U{vj3? zKKE+J7vh%yO*AEiE}3YEt-i2M%P5=GKPh_PKsz)?0t=}X!K+ImM)c6 z7-j%G#Gao4#YK10`RpWZz>>8|J*q>Msj}Ms#;}G=th}JMtVb{8$LRV!9giLU9WH#6 zW_^c-`J;3@`ih6EdWl#z0Tw`C!O{j_0Sp9_Ca#7qgdR9bf$342gwdpHcE#SLbKyPQ zjnsSVq8jYd&AWE((#GxD)%FmOf1#WUCHQ#EWK{Xoyfo>k@C*=x)WMFy&Vio6(c$7m zb%f(Y=LpY<(XrZT&eJ@<(k^#g5V_vDFfu>-kmoJs@6iH>+9O;Q9ilcz7dV$hAN0Hv zby9t0MRmF`m1vdV5@^;Tq8{PYT{db+9t^l$Hh&+ht^2fJa}1)AvBXeKmD~1AuJNY7-%%1Yl6RLR5pK|E-`=go@gIO-0v$x(uuLb^eq1np_!~&-V z&I)`GFckeDl7J6-@z?7*=(iggd^$oJqbMMK6CPbFaaEv24;`z2ZpHgvf`2x6anoLX zv-+}v+E&=_=s6_SdWd?sgzAW(R(xKzw&ZRX`tq6g-#lK#PaD_Yb$jfb z<(MDX7usIz3cGTW8PHt9IEWO7jNrQJc!n?7(8i$6ln!|g{8)j9S!pLqI^lfA+I?AZ zAn_+~Kmz0-9U{hXr=;P7XxiQlPqja(ZPKo4n+JY{olLc9O;E*$^>nS7?x)r3$7#pu zr)j6@=je9dK_z4lx_q943MvX_U^jBiGGJf62OP_ANMc^557?Tb_%k7P&~fZN0bLld z(+wk7;dmtuCkv=?S0=g6aHnr+y$hAMKx>hVdu`u^>`zrM`%;8;FXcL)7H@2!S4Z%-fstWbFm%$}8Alr&y1lj|d z#cd(p+R@R0t$dh56Q9+yKfa6a5y~X2+7B=aD`QXxC7OwX#VDTy4~F2kR)S<9YZ;#} z&TnyC0>qNc59a zn2}v7Pb4=!C_)BzEUFKDy8ib!k3IM4H`hP+)fBaP?$syx+OE)jWfyFJjQ0KY9Uo$a z4Tkmjd$UJPTAY`fmr}J*I8+wWs0ZPB@p*CGIGA*&eZq&87KCfwj{pH6f9U(U!nX#V zA0N`4WP$ves6hVX3+n+A0D|ZYsQfA)vL8qAr^Xc>{>)w0)gui_9;O(EhLhDe?ufqYYDA4(TTZfzhW35A)YY6^k_7aj!P7AU9 zT{yDwomIJM+Ctu`6E{@52D?;kOUHgi$2^7YK5SH*;yI{dge}NB^&2xzkphJH;1$px zUNf}l4dQ+X3&W2gA(woT7Q?IUI{;}9y!Z4VeqIu~>dyJ1r-(g0?ukohEYnl54|89& zRcrUHzWL|~?rKcOez1y0C9cdi8;&=QcTO-SIL8IYMQlBdUe3YBVB|fl0B2i)n9vmv zOe_3$0WX8%X{LVL33~ z7$6c9pTt07z8J{1(~z?-txqT! z=uUw(fosSC!&ErMTthvKYtZ6iuECT-T_edWv^?|B2ewUNej#~>D3OdM}aX6wq^#0K^^)rogjp-q$5@7ehNRP)KhnUZxjS_UfXdE>tK!1}RvLDxO$LM4bu($9N=|-(yqJat&*BaVw67M6sL+n?|Y3 z-`&4;RLu()T>AK!_V>k<_@v>p?rt3U%&8uAy4d>Jua;c4|La`um7;O&y=Q-%`!IKr zxMwit=dRqeE?MXMAM+S`2;wo_W!<4;jBRo^s7)+cW|(C<;yTLxHQ{={p<d z7dPTK18|bKbrLjRk)P2v7L`pgT|sVx%xAftLQU}L;9rAUkR}iEp5b(wKB!rpTbU-N zq;ob%n%^s;h7{BEjx3gKM2w&oaGix7=+XF3VMEmBJ>CH zNe=jcL@sY0Sf-d1Dh~$3PU#M9pDBX>y!x$Wvp!h)&)ipkyM6JpC3h~qX{qYViT-oZ zo#^f#ZWpy}PyX$L)!X0vn08dM;~s4TbeW)XlbOCIC>%5ND&DxL}uL6&3UP zl|~1ju<(}FXZDNor|MQNnW8PoeSE3sy5#lOi6sp?b04?Gw7<_>OfFXCf{y*V0ZE%t zR+~hu8U=uOQ3~~s!sXNj7HKzVm$;i+FoaU&vjE_G;Pg^22K^b`?pSDwXs9~9GJvQl5 zcus}t^W-C%xFSHbYa%XPz3o#mAh$jD!LzgbOu6mRfA6^O)RQqAkwGUeyjfQpa$lnA zYDeysVfQpY``lxH3-6eRIk|!IW0RouFR_Nb%7e1Q^)G0VIyG^+K21GGJ4e4*y-@#D zJE)-sLj*qH8h?NHaCMk|2An4Wq0dk}0o1njsEW_&KplKC6=|s#&~u7%`mQB#U5gr+ z-j^fINI0<*_QM{__|;~0l6twStI!IZn=xa43mzCiMk2x|_pv}|qHwR4eRa(9aavIO zZW#Pn*{Tr~<8s?y7RqSTRrIHlqTDxlnjY%1_OEI#_@(gWcICb~w{0Ju(gnG74)4^v zHdE9z%ahJu07bQhKF4QR4;&w$qeVgl` z-4*n>ZHVT94znMQD|d}s1ybSy`T)z0*ExYsJ9*1i*VRDN0$p3k7$*7F+|GtCfDTR= zQw9!%IOAu7`_1lC?O_4~_z@c#wR2oX$@JfR*siP5_Kk0g%8{l^&pthA?r)FD-7|Zo zc2%xN<7F4DB@TWop8N+qA!-#u_eur|Nbk}BJxSFIiWfU|ans+Kg|sKpeF-FTb4C*G zKg4yxSs(eHH8arZ`%N@UsOGG0RbPVM5LZ837mI=2e##mP2H&y~je(2>VCz8ki~-Cc zMUYE0 zYQpjpU7lp?5p?-(c68O93&hdwPaU=Pp7WMNl|T3+SU2$6t-m2KBs`>`ws?4Ccyb&%m!6rP1|4jAdaiqp=Qn{13oc1tZ!9ie zoG$cfsO@&J&>gdv*q~GXU~;-`jG0dfzdAB@dr2&Vrgb|vl=(CzQ$k{}H1J(}zkb+J z#5*AXdF|0b9Z^8ULvh3pv2;K=Dbo6BL)la+m8%9dQK8#{(^o~eCu{{Ltyz}bc-6eM zr~0b5mq%yhzJG0N`(-{o`&jPZ=S*KFigN45o-t*FXU4+0n;#W_%3bt+?wySr#2Dmt zlf7Tn@q>N~WJxjdWltCp9T7V-dbWSA-w>aGcn6DIA(uUI&|`PGltc#n6{^1-ZEoq6 z7O9<|(6kG?JphVR=6nqjPF&#(;#TFF!$8sy14+XGDM+%-nk6&ICc~q1NH)y@xk(U& zP?tmnaheWk_%s9=e(R@q64)*)%2$=t4>_*>w9A5fKmQVSQl754XXE7T+}})0E(r}8 zqfQO2C>ry|Uy1T}Ej?3hzijICV`)diLeNeJ{Rz_@y-`JDV>`0cAOe#BP`Lk!hCpjz zCd>4UozWAhK<&Jtc))W(Mt}^XXooUGdZtWRgf&TlE86ML=3cVE6$V&-{<|*Q|I+W) zSQ_q6e^=VmcwV*Zc}Hzf|JQz&SjJBtmwu9e7I3b5zgWz(oHRWe)`(4Q5>vIufS7~x z!}15r62{U#bIPub_~?S%<*e_j_p5CdZTgx8ijE@mx?x>9iI|10$(U+?4Cs3sp&HhA zQCTM34ZaS=*EF#U-0z<^jHZVd-J->FzYu>s z@tC&hdUko+EOl4=j!{Qzwzf|%z|vJa2yB|jevN=Kial(b?2iTfu7HhPqOCOu4?<&y z7uI=cFG_TX1`-Youv&+m_$TNs5{AJn!b%F{u;YU4Q?iwD2m=w9{q@9YOvrh^#-t zb<6p>UZzKk@h|nOOZ{u1ep4`+2mU+jYY1qxzNNZlb_4php@KT28Wk$diCw)lhv;M( zo0m#eRQy7S_-6H8`NKc^yizGYkA->rKL;u!9#{J!Sq#Y$$c4m!5gMhhz!-7A9$&rE zw?_oweo87;SN97QU7^+Y*7xZ81w0#yUc&a{%mKofG2>KF0P?q&xfklOdMDns37h>5lcRVjn+LWT$JxJ|*_kDui(6p_bY zyzfh|>lw|HtIGWuN=w@%$B4g+^NBtzBaNY;D|x^$v)W~t*#grmM)wh}K7C>_Om69B z^?f_-Y_7w6fLo62 z11zQsN|ab8c6n?~?CF@1rY3i>nDKvi4pw$K2NCafMCTw$Y~mUJ*+r-@Ak+f=hMa<^ zmxez_b_^n#O+q17@DMu+TS8rq!hT*j3WGC#m0KOJh)chEydw5fx8eD--gx)-YmUmj zZ+Y7(ri#|Ft7_>gXhRY+e_Zng~EK%83^_3QZF z0Jo55sZ_ZUiO%Ke@-bqH&d5h^xpyB1abt?(V2+Pj<$6^!=IBDz~b8$O=}G7AdR93_LPHLliXug2MSg1objOzq9h=K~b1c zjNOosN7NB$3IiigQVLp=1SMte07V(~s>0s;x#Y%<3OykuZZhO^;E5pWt%XVa*YcWR z1|F6B;QK!-Rga5O3gm>{j=L|NTso^%Q%X}_JFW)h^4qYQuFv}|q=d(>r3WgFkBbtL z)MSUyU=xXKrG!pGUm!_cQb3ZLguk`7q+F)^kytzK=Z_XenmeA9_n$3}`fyRI^A4=v zC!`pkhcOR1czeebM1<8oE>LImgM7i!?VIqUfx@MQFwH;PR2Wq?d=HeB&L<(AKsyY} ztrAJIq^9TL_^sn`aM#W(KY4k?gS^b&L+Ogq73!plB^4_ww2BJf z|DVWj&;Pf`FUnzCc<um~O$XzUWq9QL2)}CJUF8 z5bEmf5kfVbI9vzD^N6+Iz9CsvkB*RFavQS@wq~(9(ESkHENhlk)Mw3PVSN^Ck<@ev z&thQ3Ob7Ui0K>C2fSVbi?WttkjL=#M9jwa3iT$D?XV?OEz~-#7^to#I`k^99pq9Of z603!p+6C7Jf!215LHG&(>n|!M^iGHSAA7L%8BuWPspbcMHw6K1**InyEFoN(M{-du zZb`$x4ib<%_oh*~KOjywnWdO#d=N9$h3fWJ-ZaBM$8{-S*(|K8`j7Z7NkB%Id3WPOGc`2PkmOe?fu!NGDhq3e3KWFDYCokA7ofWpHQS zP5WQbU~BzKPJ3i%IOK{}9zEp_)rA@ETV7V%_lO{|dQJF>Yery(%lp!Yh%Z}6Dmc|f zi&LH5af4F5Nfd@iN`PN|X>y9cQdsD8{$d!?R*6SRUQGZD+hrD7iElxc6Y)Fc1xTzx zW}#Zfvs`8#bxeJkS=We{qI7*nnV3}uT=@^1$_nFQs3p*jKgjDrG-Jd=Ei@3{`2@aG zN{r0o0-bWdL?M3=Q2CkYz|E-LaJoLqTjrTHeRqXlj=Y?FC-P3xcE9_6&#K6(pP3GlVFV-ASTv9`>an+UP8y!xaR|98MIFPlj?|9A9H3t-8VQ!M@e z2nWomwPj_s2uqb~$if6csV$QF;_8k+D21k9tn9J2hpO}_O@tuYknq-B_XH>bJYpy$ zTu2$Um2^+Ud%w?NN${z^wjMpPBZNo%x9}vm(lA@?|AKizfCM`Qp`C;e*T)blGP#Sv zOO;cUbmQn)ajf+lBk>%h_1o3E6lD4->C?>D_|vV{Z#{|MDr*%cDhsei;&j(5rDX(!bhDTN zgy7?(-{?~jmyuLnHOpqGvmF;WHN#)vs7%yrBbAZj^yEy(wT^`em$U{UAU-h%vt5{k zBT|Ne!u91>ATJO{qB?aSM}fKz`cSCd@Auha;RuIkyv7s>g2yNr;)A{I71POLVhbjrgbC7OL9p3Vx zpeUsQXG^zm(5VOqwXD3WpM2_K1cc`8yl$4=lkkt7hnP^yxWjyPyoWgda>I$W=519q zbiQj+xMu7-?x2ar+e6l z3((nJqmS6FLogLwzsV@Y#|=Y;w5i^U&fhK7fiLhMMW}ZjDmCz95t{`;IIJds>I?j_ z>MxoCUtnHP^l(lX8h&MCWn;WCZjak5%ZFcG`{<*!t5(tL)wzAf`HN~Fd6*6!ex!Dh zoMZ7v%$-Y_gib_wusIH>uMY8%ItOW~k2>ti9Eb5J+-i6WCspSs4u@7+)uUcBvDCE( z^hfPA1d3BC9N#n+3$n2&JtOelUfCZM1A>onUyO@ZHi|#KUAs7UV3;^*ajpKxH)|Ij znqqvkuokq;odjBz@%>zC_VJibSG}-*eno0H zILBZ7HUD74(CwQ)cYd+8SV;613bnAA5AZhrKjg3C3cTFN%xl06~qflnNsevUY$^mA<@Ob|Ht$zb$8X)_V(1(b;{m7=8`8u*|~GG zwcVY#_AcHbyH~M`i*q^d+Wnm6=<{Q6!qH6z48|%=UM^Z)_@pxC<#MO?DcN5zi1_wi zC&`@CSw}aN!VNiOf7#=7CNj96eYiKh5}PaYO6AqmvJna;o|3)6W%$U>EVLS{BBGeh zS~sMdOQFMwd}k0J9(>8P6AsXk?{UR?1yXzi-*Sk!rENbE}x| zSeg`d$*NXfF>sY;aQDtjzqY48U)R^4&-KXnw`ChTYr4B_p=5tU-MouJ8!zu%!ken{ zjh*M~R-Sjm=%)^!o9iE_&-V{NsxIR`Bm1L}<09N$w+HikEGJnMEZyMyizI!u8l+~T3vyjCP*sl@5Pf`J&2_Q zjc7KX2v?B(vG(pWw~hF&8_}^&C*}G?Q8H*(V9s` z7o`&6wpzP|Jt<$sYZa@wEZ59EE~bF8bwE682Pbx zM=h3c>5@cUWkr*!Hrh}f_5-*U41bTS#jk;ZwY3fVWbcWZYC0X&)qbtRfsx1eI6pLy z-JSQ7CeoSmen>VL*hKe1hC~v5bhDsMiPmKhbD@F2arRwmbD{6i?N-okBS=1OHw^Ad zn=i1ZiWp7bpr`CWuosln1|xBuf6ZponUuqs=HBHAInd`VCZ$EGwq!yvxl)+hZZx>! zsRnnf7emn|g-=`Iigtvm?AmN~SHTzQRJk!9>!=F%_=}xMtJSNwWDQ1dAe+s{mf2$- zlcmCKX0)AnkH42&4G!zMx%-jKe0LOkEQxYD*j1H2uhYq)5E$R-Rk`fH7>hM0U@?)Q zt~NT!;xdQxtBAww^XhC(cyUc4G$)%5FG(fCkjoy$nUJwMTtRqJ!lb|S&M2Hb_IFB= z<1_){?`7{-QI|>68=@an#`Qs3AFuc{G29=I z6s~~;FK~9PJTCYm9EWy;_r`oaAT`V08&#>897Mo>mnR<~IlvxY_J|y6WFPO{N&W-f zF@ypSY?}?q0@OW;0!cg*5J?Ca|49HOW=qV6Y}jpa35COAw-<2n!qR%vKz)BhuNd?j zEc4T@q}pOIst04HDz&qwQgNj*VyZ8i`kalqxY_FRcb?stFjp6KTJ?xKqf^=(Aq&~l zo}<42S06b6P5*W*O{a0JMAJuVYr;5#A$lKBbXSEF^t?fID;@G^Ea>qX9bT^~M$SMC z`@VUT$$<>SyQ6ZsgNUt`y|Y*8^;k_t4wb(5cSoaM9V*4|kD5&0r)2MQ9+1*%&m*ip zFs3!+Js=~g28>S8Cmb8Le+H2ZULcdn&J}wZ-;opu(g2K}Ebm$@S|p@7(6$JAqLr@D zP_)_jT`q6V?S7A;tE#Gg}Jtx3606=}tSsI<*d_ z<_i9R>{;PUsJwec^gRjKf@;nSRZ5Oi!U|&$DJ9JS0_&DcMjTBqAc5>zkH@^MHn7YZ z4mCE#WPeKJe>ahvO}DL?knYFyhx!(v%QI&P^QeodC8>5=`Z7N-=`CtA$fCpVPfF)g|*$` zr0jdEwtIIwE_`v{ZC8n_ezkV-g;(EAGP;+K3PA<>TDVW_s=$=%K~dur;kP)Q*p;QX z(lvSUp_lz>+$icCdEg?vGX^z{#NZ*EQyr}EOQIgXfR1;)g)Y`&(b2`~Coi@Mme@hf zLKGFVi--PhlE(0MTSWMn98KRz{z@EIPw9MXSQd}P*zJ%k6(LJPFyFL6+ZZ#rJ#ybX zXYIAuc6R00ZaP2Z&<4m@tEv(hbh*f!aWD@;lPIpJPF`#(H)Ob z+(TRpip;Jt;oRwrS0h5QJJ4i~M!~$@4s~@1{9|LgCT49*>>4rVKm%zum&m|Uny^$O zq^6KQ#_hvlBY!Q9_Tdr@l;J#xFhzqLbyO8|7Mt*$wP!UASwb87qXw^E?MO6s=dbP0 zw{$I8R$FZdR5s=}T7nA2Syx^ci8eO%$9o^x)_(ClzvW#!&wloe;hMVU`Za3?zGCZ# z4Ay`u8O1o4;w~7w4y$!YA*hY{03P>N;ZdvQ9N&bSVh15O$z9n4F9u#1@=k}xwAuvJ z%3-)_%BN(9u@^J5GfTJ(P#ZAMxq6BR&>u}+SW4pfs7i~Ko5b(b2(Jv`{@{m#!IK6U z&tS47uS>Gc?75I>U$t~=*OIWWEx2acva<)8w#*yxpBG&**wZ_&yS=^pQ!Dv{KhzpS z)vZk}xz6zNKro$|w{HHK-Mt5@O{qvA=yv&C-oDO``PH=nE9sPP@*fCS%io7r734(G zU}17kk^yiSV_pZL7`5B|PvfrT`0Q(+dKT5x_YAMOC;1yAyA*n_vnwE=t=k=0SsqJU1pR`1=3n?@(Y|;wy(0Pfq;qu8GqoevB^L>+a-R$A9*tiNEo-U;i83(HHnS z;W_Af8}}K}Trq^}H`5)N>5eoSHF1u@MHW}%zVIv0K<%jUAqTthLvp$?qwH;98pzQp zjH9bI7-zY$8TYjx@6bq*JkYH+jp619&^QcSR#S5ey^|izW}OX@{HVd8 z-zRWm8jY6%`_YOD?S6C>>ZEB?I9-%9hIC?>T}rQ)tbr7wNBJ;sp615zyXO@P^XC!D|^4MJzi=s-u{f6$qt3Mo`>V2Ybu zX#STm|D4c3a<09)E?PZUXd33fu&jShQ?OVpI@1YrAOeqS5hP&SF62G? z(F*_SnA(sJQ#zm?Pqbl&Q0v=vc470%0KV*`Pb)in7dRX>O%-RzH~W)%rF@&OXW5+7 z<(qqb&%o3g@v#lJQl-sr+creTw^w#(+`pOMym`SsoE2vky~!H-RJDfnUoN(uv0>1? zwH<$y{&OF%d@k41JG4x1^!Y2dDo;OS!<^*S8hDk5xHDvj_HGc7#@Vo; ziT-d6KDP4W&g^TifiSPV2CtZWX4(f=pWQ|-A+#uaC70bsUr|UhGWo-kTRACYnfAl@ z9hSWzUdQ7~O4rac7NSh{I27?UUX{drnhy?E*ry^JoZp#I4!%d+V$p8txuO*Huye7l zCfG1AP<6?0)RW(&(qR2!FyIeHOzu!HQ|pNgS8l5mx`K03!BjF-vpFYJwzdpM06~)Q zIAwNs&>sj!jqY|KlkrBD^=GO&f-R|Va^fd8lhI}~8twAWdm~PluQ^-r#D7vDbcb4M ze2KU}y)~y2+ZV;Wg^avt(Alf7-^U+`qR+_VO&7e0Mf38Ns;CT)`@?h}4hRPhiwf=#7myz~rJv*q zJSrE$HrOHHGNI=-K=2DTFLMpFT0~NC4Pa|jHj_pR%v={5SC5S0ue(TlRq_e=31nAk zriz5435Ev>?9aq<=`cz56fO_*@|ep2=1Q2|GDoKO8~wG#a3H8}9S>i+d~3FlR(YCg z8?TeiyQO!lH`v;vE3bf$4`}nl*h~i(l8g4IXg;)2$ zJ5Xz!uo17)Ue3JSY;ZXPUQ{~~y(8D;bONPL3Ge^lIAKHE;8eVgJC5^Yh2x+_74SXG z5mzk0A%}Npw;bprP=tp>gUc0&!D_`hQR9X3M_77jtTxWa;RE9cIVmtOHPs_fzyn3^ zcmUa;eaI(4sKZ=kJjKDHe@(F5O!J!&8rZ1|4-!w#3N*bmdVsVu9TSQuAoOA8j@>u7 zYM@uQc6DoAx~@_YtjmmS^XB^fOD<@$d%k%2oVkglMwJW4L)``WkKTId^yI^L_}ncT znA(jO8T5^Vm7n>7)q8qJ{pMT$&u@0$f9p1~cX#tW!VL;6rvwdmh|X{T?z5jh*@HIV z)@YI{j@%XQ8Q5Xwd`($| zaOXL?`y6mD;J$!F@L`BXP&twTB%h2s>rl0{CuQ<~bW!d7N==`^hYdQut zduly3E3<1`J0AMZBHR2#ZO8IOw^Y}P@w)ETRImIezxmlu_uO^q#_c+L)R!-=-Nt+W zaO6dMAQvCF;fniGT`QNIGawFlU|oI$Iog5g)G&A1IHD^X)eTG3KNOc;UQSAkmPOSAmu;<+%k(K zK?{nVq=ym(@}{DcjbKfgkOmS4=`gXF0;Y>!(YyF_w=Wo+b4j}4!ojNk`E^%*?%cJ* zx9mutdD@2V`4&F?7-E?Aw(K1%2bK)4t#TT=>I*HQ#&3mIoN>e5yHDY4{Zlm3KS9e0 zQhrIf9D37$-0qJj^MA-E9iL&Q7v1oW;VEk)j-<8Nn9F3t$!s>#*cgcPcsHuw*bs3!>^Cy*TlmK&zz557YQEtdWP*Qe*Y?d2mfWkhF7Ud_mg{~4JqSQVfN zgsW4P3f*KXOC>L-vXsk@O`Jb8uePAogv^1aI+xcJsg#mi&Ov3JI#6Got}xgAe^8?S zdnKPD+o)pshu2sX%RhJ&KgVh;|Ace((WxNkuAaC@aY#4-Xtx0BWC`xg@rVJP9zKkM zj^Qy;dPM&O5X50jY8j1w>3H*3C9Q6gfc(&W`k0s)GvNsx4P>s#pfuToEgpPg-=z&#G z!~sr4b>H; ze-RcII_B4?x+mriJTmXjhqoa)11llHeS-v>^}dN`iG0U^Iw_} z?ijjb;{H2_?_71;vfHWetW0^E;D-Y#KYC>19@!k>E6j_eT=TcbmBa}&i7`&F;aoLd zFcAmj0E__#lu90Zd5HrAZN?J^s?HOz+!v*4t(jt53rMfSqZXuBDCI=@3gG?<9FZO^ z2&lFTyr>2EUlmdsT8qVO)_(&u)`0ph)HFe9!@E-iM#APe3eQg7=rjUGQClQefcXmi z&!!XYts~FzfAN?H7xbCjjnm2W&J&lkr4~QDIyZk56q}?F^0A=UN0;*d-k+3ng^307 zfN(Lqv|29DHFEEb=ZUkhH;l85codCzp*m2d&O4I$bXk%|)8?LW*t)u0J#jjp&omSZ z#OcPybT;!8pcPQVL|My-BBog>_D!*O()(E5J71V}z} z6Ffumj!Qo<%ab_g4}Hm(!6!2^6@yoWd? zmN+#~=9G^(rDdG*qsRE@L&T}x{yyT*z(7ZPCp{6xuDLyUBIN}-fWBHqus;jetJp@9=m$oQs*ilJ_?RAV?)tO<-cH|v$}DK z$M~BWqCt81iF@Ri37ZjZxiI@3;&zSuNH(ygft_NsEWS04uiah`cs}m)IsCx@i9smj zv^(e}my1_pHWP_Q74X_g2#C=hpPLx%^*RF~vW&2Y4ws!6t#>-Hb(Li^tl|I2I5-zRANd$HYs!>g|;VA(d`-I9-iNfQ4e>9P-CR)eikysc{ z;Cnb-E>s>#?ZNRFtpVmGidF#l1n2;H7p#y-)SgsqX#2ocFUkjl#v?O1QFbrjVX*QH zIs8O;SU(7Rkr2t)7YLgLC_3VR5+CyHX-h7@XmIYL)@1-3@4T(~>YJ__%|yGJmc(mb zng755zhS{cgB|l88sMW_FWE5IXVC}T?u6F8Wbm~$3zzo{d}-di;Z4G7I2jXvyM6e! zC&3i*G|IHM2`TV`07Ih{-&MQ*m=^~K?(L6v3YSN{9*ZBtOLlAKngqiwlTH0Y=jCRD|QimDT z438SEvScly*0LFaJlpKnX-=I~0;^cuv-_UlC2d0szQwzKcIEst#YFYp+b$nCwCv#7 z=VrCZP{?}W@K^YsZ~D?||Ej$A>u0X&+xmDM3OvF}p|0G|?zKJ~&k zaUuewZII&Si312_%7Tvphbh^gCix`zq{U~IePPSt&&b|Cv9u%LoL#^rDxjzS6|TVj zc0ZSeL_<(frg)1U^Ch{gqepz*P&bMNVe0Z z-^C-N93G+WK{+nX>;1k^T47C~@8KX92l*6euRREu8w&*5=Xasj|5t1iOY`Zp%aTY` zxFF7BdcCFMY0#~avoN}m#GXK4iEiYOFl`KqmId|%;umgUeM6*`XOmjNeVpK(bM7%> zDrpubrn0kVirEv#OJ!NVFsVd7m+9q-x1g7c++RdPZ6#XHvgBzN=#z~?@L4qVUg(~j zwq8Ttu8`=3eyw_Gnhssl_kzdd^shB&RfZ~ zY(T$L;iKtx`M|ICJv7)rdIVI5{NchXFd zC-e`o6w_G$_ZSV?mnQ|{?=l~XAI*>vW=WJX)`R+QqVD@aK0v||r%$YuWe0VPNolL|ch5ua9oJ!Y4Of zt%;U+kyz8%82`9t42YA7mX^?m8MCh0jM=n9R<>xMPH11SY6A?@tSA){{ZiFGN!Kaz;8<2e8Kp^szfJGsE z(`4j9L7ocWQz&*)3b|yfs5wR{2fBd zfFG6rS|baeQNR536{&_reREI$%?~CX9X)M*$dwUVlI>lOe(BQtZ~R|Q&+DnqV#~7i z=XA~!0)Z}FI)C;^(QFnB9fz*Hbnf6?8!lMBwql#{{C^$1{PNoB3*2T+TV20 zj6^)1l|-pm1pla9JV~|5+&haLVqqe1xtA&@KlJ1mzBp1{ySUcm-EsZIkAC@y8$|n= ziCDDh>dSv|+m`EnVfTfm_CkNl+4F{;7!3C8|hzg`$gAezhLPQxAyE1 zTW#*F(I_Z`!7CPu{(n~I27(Fru>aU@kpFql|6kHm;A$~{@e}2CZ2lPL&g8wAeLABm zjGb$`6jixb;V@$MY1OJTQ2O2z4r7KS=%y;T#X=JqBDS6mHuTgl9a=ar*xoZZC=LyY zV$VYQiTe5;dWR;=z+l(v8CA=}Bk`cGJRP;1PI*;+`SKCW!ibuPntZTdi^YuC)IY1SwmA>N~=8^)>yR|O`)-^mZ%rF zA2q$cVpq}_4q7q+jRVHGh@r!Yi`cLV3i^#0Rtc8t`SaMYN_7qD`8+nX88tN7Wu#s- zHoK#2#neF^*Tg-K9S}}=EFSXuOirKA9FLpLP6XF9w}UPzv|FfQDllhVo1_wopVwwKaTu$5e|IeA)1edX{ZX?S^{Dqb zuOtez6q!0Jyt9bHWM>6)IHmpxFrVh^P9Ovb8joQfy4YrEq`=5rsRu)PWK%)tr7&BF zH>{V!XimAbM(b2Bg+9OX2dSZ7=2qcz#s8jH+dyp;{>D0Isg1%OY;^a7?9@h~bkw|* zsR=KssSbs+>BwRrJ|Ux1nw#2>zLq-f@flq$Zp=-3FgNMWWRuaD%dHP;Vlj6%>vjhj z;We5d(bz1LhTmYaH9B)%u8&21ZdYZNh@8p9Y%U@)pc;wDQHjVv$zeo}&Prr9MK_(K z@N4KFD;OL%poudyZO&?T67vs%Ug;(OIn|_6BJ6SVRu%g*ned`yb?A!_W!9SlQJ&VD zGEJ5tzoPut{4Qk%PR9VW-=Ft-hva%SS7{^iEFf0~jav&A3;_ihTIgNTtJkB5?LZMb zpJp*hiG^?QPGZt>}G7emhaRO`>#LiPCMeP?~8r$S+hC(U#TIrjva}&k#hOQmrKI z?1!1JM43@iVqgF=9MijLA;u-|>7#L{JYGnRiu|r{yfWNmZ)w!I_4E9-&33i1H|@<< z36?-b)olEmIv9(6Ho$C#fxcU(l2z5|VyeoJ9=(QoyFIn6+G|AxSFbiccw*)bD(f*- zhsGmI-P_iLPG$9)m2l3>XXWQiPwo0JEu~Y7Zf_o{N;O?o5nD50t09@pkWR|{Lv&JU z=#+JI{R34|c`#8j57XPZrX~~hP(GN3zH%_zpwTjw59I^Hj{(JC(KRro^Zy8?L-W%A zlx_%y8p1y*|2Md^hH;ELDtKA;<>$C;_5}$zBxsHF@jsW}M357`h&~wNM5Ofd3c!pA z^6U$xpXagABy7Z1#h>@dZ|eQPt+)?U_ckC@ybgp4H5U|h6+kmoR0)+FR<|jH>`N~g zUVfujt#@K6z3H^~Q*v%mo2~dn zg{*>8V9y0E?>R<@mKR2*u1vbB@)be0C-1KUm>?q1_fcmSs^pcT!J4nEs*qv3C*T%% zrGn$JpFq0f7oY`Nvn?%vgUPnM{O_q3q%R5dIc$srMg#3g_DJ7zufP7>nb%)WFeK_d zzx?H%J(pc}DL63s7qwhi)YHPnO0BvQYt02Mug)GI8XWjBXmH@=14L5+3oJnc)MAJZ zuRr%3uX}Ei2w&(uh6;MFS~Mx-6(~DZ>4E;D#6Cd1Oj~-}mkyNP7w=tuXT0yU_kQuY z$u|ePq<5d9kswI{$g7ldRlE|?Ag?OXWJ(H%YXS^7UGO7(-^9KiVjFR7;tTxaCB{vo zgMzc{6(9*%S;9~9jL}jE3LqnX8zijZ&z<<}wBM@0yscVMA(sJ2rh=Et=r@TEj437I z51Y-54P|~$EG3Trc49dfQN~#pmsixtWP%d=*DH9u57Yl zD185eztLxji_5v!@h)o4FKU%?Y$D>6YLyI#mL$C|zYJ2pd;tH0&JB!(jL^hC(0F2b zZ)In1?|*%>7k5P0^D5p+&Y3YiDTnL|*_TNIoG;UTC~*<5KGRpR_13>_uf$%mqk+=( z%OUZa3b{=#Lz)HTlVyKQXNUb3H$vA9Je+5g@&Bq-e(KiJ-=QleO`+iB0x!?Lj9++x z-6+;U8Y9n5yp12<-itp2tv|s1Ngl*d1ojhi-w++yYAlG+>21LAwCjxq6)(taK|qf@ z0~lE&;LohGeWU(r^FDs#xS}!xTu=Ij65kMH`$Su`TA|A5YxSc38ogZK7OK{@Rkfjj zzfp91s$KRV-_9@OZ^5#THb)!R*68YU{lq1>_VE+r35D{$%N&@I;THh0ZvG$s@XiQ8 zcaHqw4{zWJH{bAv^a?iC(|2$*$y;h-i!SI1T+tCT4kfZXb{tL0YSeW!v>P=}(X*nma5NX~iV7=RH@3>ETbo+@TZNS!8#`px z9Zenm9m4RO^>bvgIfXerbA;ip^st?4{$0AIBz!bj}4>-h6@pTG*ipt7K92n!r^ zNtjm$xvKD{+Ozqy&!bK&eY+o{(+k_>?XG_Jz=EuPK0kj!-Q30XJqxl2pzv@gM-CWX z#51&-zHouzr_zfrmR>Zxv~A>t7e`)rZsf&nFXTo>3@?ssd-(#`A>?jxN)EKNyo?Y0 zJ`^-wqaQx-@=u?e{06i&-svU0HB`s8Z7(z#zH`8U^GiQEvJD?4sAzh^UgN{GVp;(M zhsB;b-U5j%qVojk`IN!ON3H>{VpU_V9x`j=!%!gfa&S6k0sB?FrXUNE<1xEyg`7(d zQA0RnH00_lxplAbT}Pgs_y*r4YhSl+;=P{_=lh+BwHF<}nO>7$HSvvC)*ZQR;?1}I z^#022hjzcRPI?i)^tngwx#z(L@44rZBf53#1Rwo4z0tZ?gr^=}dG=kt$IiRrz9ZlJ z!E?|3Kz`LL>(;G1d==j{@w>mI7rH*p|ABX}`=A|vTk|v7cb-~Z$jCm6>%c3r9giP4 z@c5Gl4ovLgyAEHq4u9?y!FkrVf3oGbM|thUUnk!Er}#?$y*;M?^Vk0;U2P+ z{9qJ#F#9ppXQFOx02m&@KzBO<;Zf0D$yz>*64FJv(gL8C+L{fybD9PQn%6h1%beTT z67n>9Caz9jx!}Gh{=h3fcy-r(OD{{l`TF~Bzw_-ceSVPo2uE;rI$Ygy;R@_4;SR7N zjgKrXa+Hgr@}E}(u$w;1UBTVJFBHRwd@#enN7I?=NYv{y2My7vk2SFPC>`nZ1*<7_ zV=%CUw##lm>zb=}HZ5-zyRN!wXMg{qMcdChYv<18U7Jy(9c^e>P7kfENcUjcK>^ad z4@_TkG@{K=Iwzfuy3io^{wPqM@UT~mM$c+mbKh?u<2$8|bIkPU>Tu75B03vh( zUd7J7fUB5w(kJ>ZD>5QAGzEFYGDQRrifuS-2DLuhLeO#e1t~kn5_F|xXbMPz^o{Ac zI|c;=b~9F!FG?l05~d5wB4L;x8B#a^c)<6l*9lM9Og#faDdn0l_(*j(B%Gw`8@&x1 z;w%afA_kU%iC21&-f8MJdPPAAD{1^ycEsw;X$z`o)aq9lpKhqON9`JCAc(x6%;ybQ zvlVuAg)WB5Wu;TWhZL0-?e=-CnM7l4d&dr2;{1m5ulvIza<$!zy1vz>>1xX4;&YUO z5-+Ii_J|=6@AQNf%}SZlZt==}5rg~io8}dneArg3_p~S$SGG3j(#c4ZCb_9^vs#Nz z>_Xa<)@W+O%Muj{rMcQ2tFRn?IO?yDIm4bg-CpOsmO1l-uAyJd)93Y}WGJ>|$&O9M zT4xJy3|QjEa55ClZNK2_{jp3s&Kr|e)%s8>8gkz7$k46!^8)^GqpB*1^HIhdw4stp zF^&$4g}+kqC3K9ZxSK^CXR(R4c1+tEESgG;^BmZNudpT+80CK%t*ivp?_WeU(!+YB zhvOc1YF#~Mq|j1GMe3wI(aOTb`1^uVIQ#PTZd zJnaVl;vem}x@LX)f{Bane-4xh0e{NFD3j)H2Ow-JO_WKeSTN~i51dng=6#UGtDv;%OTU;<(eR`)(Mr_&J5wYbS&T+QU$grojHW7_P0#8AX68s#@`b>%vtFA$MKpIM46Lvr@g|C?Y zx-#oXLAU^t3-dxUgDKaNeqb;U=#YS9Nz>2f6Ux+9Kjw7T`3=^FIcbYmqxMH^sga<~ zEIVGP);ld)y_ms(w%O`$Y;)UmUX#b9(dUIjKb%#*hDp8{^$Og*`;nGmmb8XwmWCQ9 znx)ZodK}HZS)y41KNy5?9YZ%!3&PcqoYmB%(-=H3jn>r#o}&H;kZ-!Efo_!jpClkQ zH5^O9KNI~vlzbDLUvE66xCvVYwO%pYHn>UhT9aQbLTn;$Rd%b3O<9zbPGOc9AlmY3Ji~+Y99$| z3Ue865DE{dI)lzMLcowNSTP0XoBc0UR-|szX@FVmd`rf#P{h$D14pXRs)RI463kTM zNyJ-t`tln`J9~0VIy#JvJL)dqvTRvu;SF8$Y8!`_$9(NJzdP=!UNL_bap%wY^22-d zYPa1M)_U9OMzWhyb?2Y6V*S|*Z&92c4MaV4IsWLZ@(+ts=8{d=$t+I#sK_r-{F!Nu z`<9$DJW(*8za6lBq;LUF;_>juStfOjue!R< zZm;urFs-BzZFM606zgcxwyy3tYc;7w-kCC3fSM(U(dlaB&-fxmxR_KbXZo0_krS$k z4-?+mY}c1(q45cg*OB*Tr8C(v*3$<)G6xv5Z|z3~m1$y36}%)`W_0&0Zduy3vSW}b zk)=yl5`|Z;#DcMo4h*%PwDIp5VY2a0R4?vYuwb~QWw@iGm&A2=IFdsn+wskSBeOIE zLb;U2kdA2v1l^Be#Z+q`zIO~uDDooh{_&dy8I0~pG|wqF6CS=}7Pg*Zm*M7<@|*P^ z3UA`{40FHR&oxQ>#(*Er5jqj)6&JJ(wV=8EUD1f*J#l$xsHNpY8x)_JYETr_^V<6R z7Z;0*TUsz@s~B6n*auU5f~Lh4votME@z`OQ9CjkTbu5Etv@pVZoN?-SVlcB3r`*!` zVOIZ?S{@HSHVa=*p%;?hK08xc=e4X^u@Z564R`B)PS0#VP)unOGo+1Jk|sjj7Bv82 z!On<$GHEg?$xW}M(h(lXO;?93et%r6jRP(iOxqie^Oe}mDG=5GjX-74mJYs5#~L_> z8!##-U;xt7Q=^n0Vo_wx(|`ufot0`+paZgdXD17+FFMj0U2MWlu)aL^8TjmEeJxyW zR3XG$q97S!FQ?52)3NmM!3B_^I*OKGxnk+J#_2Rje5zN#{#v;8KS?QUH+7qPdIVGB+sSmvk7N5f3Z9_{U5Ue+sS`anwUOv!^sftx9F)C=i zhC<ZX*a3AzFQSeiIz zCp}&P>VbGudB0MUFUR9sET<>yFHul-!)<-VYNu~a?!tRs{hCZ`H~XusI-k?x58gKM zuzC5qkhQU{Me)-TF%PZYvF47(Hos%|XA3I3E0J4^v|ZHp^;^3)@Ww6vt|FQ|1nz2} zCLK~#Fgc5GyZ3XBlBV#Hg*leA>Jn)KCYO;R4EaIG08EYR#01?dmumw7zu#TL6{-Xo zV}K)~aR$sk&^Uw9XrnP`hITVGgnj~Cp=pRh)5%F$_7Xi$rc~t75)p;dX8{wE?<&#l z@a3~<7t$*wdf9}};he&LGCMTP=x5<}?A4usY~$>t>Lr_m{+d~7r2<~8UQK412xa3` zY{GUL1kjX_VKipuahyhmX=a&!zkK?BG|m1h`0}kLr!*01&GOyYgu9p)lI|8MFm%(Z zfl&3js8$nlgn~Ym(UB+)W5y*kb?@T7|7$SR4E)#@TKGR~N$ zfA<7pM6M?53YBJy;MSDS=9CJjSuT>Qk0e>b>C@36W)&~RS<|5*K7x#)2qoN8vIX#U zDrkQgalyfj9?J!B5}QgouqUAYxE`4Gs^eg(PDb(%1*9rV<1~^^lgSz6oJq~;0-B#r zJ${Cij?JLhG*K-}DrjO8u*nbpP8sJ3a~s^fqAnf4Frp`kPnTr~-k!lZq4|KYn*@j} z&0-0HT@BnLvIP+!0-0@cJ0Zl3DwXJS2KGI$nXo z!!g92<|Z+kB;kT%K)QcAE))3%1R(&q5>5q;NvK%%rTJHWWE3V|dDp5t_>m7oUnFBQb)T;H0uP-^lEdyv-8xTlqB4wy&npe)O{>i5&)J#Q|kF5;HxrNQu*Z0 zAg|OI#?En>#>}Lm&W5_di6O2|ih)Ed`a7_Xy5~PQ|I{&$CjRrzWp@%+xTDWZVjZm+ zJt=sZ<+NnUD zeJHwF`R=9j^}q>E7eiD!1z57NlcJ4@AAW$C12vsUa8FspLR#tXJ24;sw4C}uAp)(c^KwM1WDxgzj=oI}MZP05Wpo6ELgvA1C-FpGmqF;o&Jur0~L8*JA6#Y?g>( zxjk4s|No8!nhZ`qjscQAGXKFr??*O3^6w*}J@Um5H9(Th-oYr<$$eYYg~(>3!VjAr z=CMJM0sBb~jfln1buF64j&!xdPR7{@gN?#xV;vdn>gpo-){W@6@4zGWfRqe3)M}93w%!)wV@12uwBsZ6Z_7FtA|C4X0>#jwS5hd5&aX1F2<~ zd|cQ)M*72*rwX^ntPY(h#tRp>rj%=d8)?>8U^wd_hSFWwh)||20ho$Jy(k*%tt>CcAbP6ui>S9bp1<0ccf`e=#qRN-{UvEk=^PAR!_hIgsBiS&*H+fNf>&=f%5 zkivA@k&>Ird&=IQ5Svz4s_km8BgtP|-*$)o@xEq0+L?~EMXOEvu`M^IzrF0K&%X_y zb@IA|N13KzUssr+DO}u^@%7kn$x=K}3pTadSW4(%xzr4@SIJ~nHD+zC8Y5l59M|sw zk{GUEea!VcsH+0O4a?xrZA=<|mV%h&hWLXE9_Z(vJkeb#zH!I!9fwTE-5T+rd@nZP zF~$RidncR^%)tvp^+$ykJRx%6?it{Kfn&h|iAyCcKr;I5KbFpAS*fWq^EBAMlR^ER z4Be0JvwUAR;c>=83Ez*jeTchLRNDze(MBMOl_0OT;xa*myw*iYQojcYh!b)YrfPtJryVGxvb!Dq;d^8w|X-#XEZ|hvzGyneK|7T_4 zeADe;nR~@2yOw@#-KyS>7nZcI{DK!P33I|NXPlX;_1fy2lF2ReJ?)ngSIb_7c9W}%h9RJ? z)5j(f{{bf)l4cW-cV%@aobXA>>*amNNgm}IJ4o_o7c;9$)n#z{ybXm+%omS$77AUR zon53D<}d-HN(^dM3^v_6>8+n2@hetURyG*g3eI?a1BqBcQX7RrXBah{54dL$%lY(;UdPX>BhIIV?V`!aY<>F)M zASMY2n~;Zs4pQl1!Rvs?4jc^&+!r0xjGZ)$hdyC__rcM0deVK-ThUsmXkY&AG7uI$2#YV6R7~ zbIHmwO4|g$l~4u{SeI&{4Y1C&-0Hn1v!Y66He(2bB3;&djJ+PCJYPl18_v=0v;w=J zF^Yn350)fb>PviatN%G`m{Ti?f;CZS^zQjzNo5U_KW#YzoKGZ~XsjFiR zt|3@5vlF7D#!oh0l`5uP;LjH;(7%<|iaj)7B>=o0ZhT&&Ad&8YRQ z@~M`;QLz$uVolsF`#C>wn6&_Cjw2HCHV=xVx^dSS&9p5Dn2_ zQCY>i9X^Z2XmmIX)c%eEEj)}>Rq>hfp`U|1Cre)-m4*MydJ^Xy2c9D379v@D4uVV5 z1f~2h(lJGvN-9C)$Kq`M1aSx&5b%Wrvsf+>SlKr9lCG4$amaj6_nhuwlg*;{8C-2j zx5Lns?MIin(x(llvB~_&y3cO9p{w=2jc4|+U9h5KE5@o{+;~wR$`j;zcMFC0<~4dwp4i+mQ>Kl^4uUrOh9djXstqlVUCT zp&y$&!prl~D>eRcSX557{at+e?lF8i!Pm&;Cuu{Eya%}3Umd$T#RISGRT{Yj*!lA` zf=TfGZ1_P-3!zyCFMRZW?ZxwOIKdTZPz!sS$;}vbI!jpurD)L-bZJ-{_w!r^uNOHE zi>}ZwuF(QbugQ;|RpbM)%148=pPyB68P*U2mKHb}rRWjZ6l5@26&LkI3Cw(!1?Ft> zkNFU^QpZmAlN}O<%g-YEqC9Zqrx$#8^9?b7v#00u=#rJ|@95~S8#wcvolDkTc*c%( zYd2kx{OOvf&icfrHDkB_HhWrj;@z(rz00>ef5lmg`$BVT!}BlMwEp6lvsr5l#G6`+ z8FL|)^xV5RwPMXBH{7zg$7`oqbXIi}elJ`IoxDp_{ZY;<0`QAL&!;FK1$sV@I1u|j ztK3zxgkLL|!yIxdjEv(3gWYVlgHYq>Dzn?Mi}4^rvpf<1rJ4 zWgvdk#v_CWUqD+AVb7Il>XT?nb2*?Vt3poZN2H8oiUsRLTWOaiN)svW*yZr#mW9h+ z8pU52iKOiXEUzM!c0pU=_dk4k^S8I&(vTcVY_5u0T~4Fb-gxzJ+uT^#)_BhRhaS4; z(OmzWMd`*zulfC@KbUy)0i%EAg+KY!#UrPs*M$P%jfvBf*}=My$JEvM#F|an`tFX! zgO85%I<=)*lS=p=&_1@p^c{VbP{NIv%CN$=jb*hw(#W(Z{@@-o2l9M{ngfP=^eHL<>mSg(i} z43UU!QQ+!;ED+#`+0=aV{;1tvMfX8XtTYx&{KS1=1RApwQYMD{Ou83p5R9`-7{mH6 zvJoF;cyj2c%voW}XF9qTMY@JJUcYM9P~Vb;$w0#7F+u6~ z#!#(zP0{GQ*R2&0n_Tg4^4 z6b{?u@&t)$b4gT7OgO1tg4&mw!zLxTq@E_3QC3pNvE!pDD%tXR4-Lw)r=+czDSrQF zA6;ch5u{wjI1uE}{q2IdPbO@z(SsC~(Q30%k1KSTkw+;>4!;{ZjCzWz{3Q2NZHmnQ z0kv&@D>kpea_Fp9bkm2CIOl+ZpgLBWm7|~;8Vw|*?qQ11C22mHa+94s`0zYg&#W4b z(d39Yi<)B*GsYrISEt-&8~$1sbjhi_N}b@%EEKJ2AmWhI>sMW7s?!v75;A@K_Ql$ zwUcT^f!l)3Q=d~@2$~RT)sy3F=@eETJU@nN70T_ej!!oHBChsF*inMiU`v$2re_VC z2@uE+Nul}gsVX~SIP7YQ0R8x{qp4H`X}IL~^$nF?8~PB|^B zLuwDSgV|&La0&DsPQxVG;2AKPVfhM412U+0d5{{ICap*uZTD;_2#5$jUh>dVe#7C- zKmX(x77pj4rbNoa2l!^$?=CL3xAiPLt0ua^Y|n;*)joyyvn%dAH0tvAEx3N=_QeVN z^3~Dnt_mb;dq*~Hzxd2?-STW>b+pcDLd;R7p-uQ%IRch#Qv3uobs}4HQCZSpP^>PW zDy$_R0dX1@9v>XVa6D6NQv#;OP&k<#l}l-)A*x_dp(Im50dRaWdO(l{Ybu2btgBp_ zJ)9&Ci+!o1lI$z&>?An|Bce1TRp4Kn|KI>$edO~pt2OV*g!?lM^^L18?<_v7^Cx5O z@CtigaT(ez4-G%Da)D8)i6$4`wBe#}-S-P#9*Nec{fkVvM?_Sz(-mLEGaDC#|DaZD zc;{3m7%3^LvyZ#o7<4`6ULhmCD!Cm47n(7jPhl_LW_a>8p?k{*2m(RN-Y8YY^aV42 zX7mgI_+kdX%Qpx_71fHHm;Uj{1N`!~<+-otE-bdKT;GsfWw%y0dlCyhiox4YzxmKr zPpI@xTm7XQM>bw_&du^g@xdD3JeLYt@0*XFBY#7&9e1W=Ij5hE4OQ`~lGi{H3F~hT ziUa{Oa|{D14c3Fw+zhakgJYs-wLF9L6V)p>@{$u>u7IIBHeH0^wMimG6-oJ}yxrd_}RVxnt$U-OcyA)ShH*x;g4fD?+zR8}Sak^xTG#Mav(%00P{OMfUZn7q7&)9a(MGxFKwyRBFoeBFRdW!SQys-(tz?q}Tm?+NNpBb;bK(76K z>C{b4#^^Ampuqr$(-+dljb!5xp99Z=T0qK4Q`EGM$Nzdn9Wg>od)-x(m=XAwS8FgIH zN!L^ThWAdL(n&Hwa;e2+-0#=n#FA<{!9>g+ObzKO-6o<)D5P?iDH5Ng$mF>#iQx3Z zJ1Kzd1ed9Cl2hEU?8@Pd3wJE#Zm_aBxFhp9?bWidJK=?j~{<*GedbcDtF1cHCjV?<8_5R>}m)|T$xdVo+L9PJRh zzlEGoj;lxQsu?6oA|SGQ^ls<_)T%jsrBXy0Td6D%bb_b~(13X$P(YA$glZa8t^#_L zzO~S?Pj*OD2Sl;8wWYVOySoJ#AmjQzzOQf2w_q=;arg(h91edy9eosX>kCBd3;>Yu zmKh1{8njKlfus_-Qq&v`8;}F;(Hl}WiS7RYk~J(R>q0=#O{wUn_S{sy55EsBTV5rr zj=5vBN*2b2US;JI211B)Uu3v6Wgvg4_elv8!=Ocq#d;g799$mm3|Sh2{&t9J0G~JdiTaWmr?;ez0c_kV$ z^nK^rHMN%rH*UGqSmnl+&aRBr$17a^q1OY~nr-1aqh6*g99cPY^PsTEIrp+Ab+b2- zxgcOT{1)ksvK-iiUzc-HY}cSoKrk%j5U0LIbbJe^lP`tb6uAc^ zi#cFM(LZ7cdnR8x*mcd;N1xqvk<*&;Hw2>2pus$L|D(6x=XZpInW9l{vHPQLOO?J! z=Jx9y{DTjiwlw1FUGT->k%gyqpKk7I=xyv4cU;vht~}$9qrd&OZ;%1G zaX&VBzB=adQ;XN%M6=PEzsY=)Ll$uR2@Buvw*% zm-6+@L!$H}v)z!1cqxAdF_3+*{VP{|DVIc=uB$&4beTj=!eWSf2h#(YSa)zCak)$loA5hOrxbSy z{lL%*bC-x}UjXS=Hysz2fx)AhMln5}04RgOuqqYvj5)4se)f6-X0t<~BFut4Mk`dQ z3f*Az)^F{KmQ40p%Nt8$kQ=m0D&;K~qP;&eoCB7`j)x`xge zXuj;Cd+%A@TTHjET+}tUx_F`1ZI9IkZNA90=-~8)TYHO7WFj3aI+_g|hc4)~H#cVM zH9>2AyiHlw9k^GU@OyUeHyu4!zC(TqEN_JS%6_f_vH@j*T&h&NbWwYbpFT+*B)sJS z3XZ@!g4Qr`*p0+F@KvQ$q=PSwdAYqmB|zuwDv1$K$56nLbifdw@w?7EP26GcS&(0g$|Pw+5GD$`@J)57*hW2 zJBIE!te+Z$lz7Xo&L;eCW+w`KmCPY*1FtLD)w4Nd`J?D=jdNQuS}g#!IG~7CTCIq1 zszw6=pw%1%uAJ{_+zHcXd)=Ha;8)0@>}aZ(_Rxkonkug5!k8*nA3s%0iVOxp`ky(N z6by|_%J+v%4GTRVWmuSxeuNoesBdAF{Cs#*4O|!ZRZ-ten`N8Rt)_aN*PEuK+g`j7 z4yV&(5qDQs>a-oiLpdt%Iy|D@D_UBcE9qf!k7dq%FM z6g(EtUx=s7b73YiI+-@W39yr(K#$@(2zi86k$AlN{BwFQYUon~ye({VQy{An)fIM_&(d8+t^Dm9zTy?aiwce^R@IP+uou9AGbebJcXtDFg=`b2> zkB8lrS&L<4Rb%kTpo!v_#v6BI*;QH&CBo zFDir4JnN*vOJ+t&iH=()?iABa4xf1Gx=Up*9|Ab!FThW0JaUA!PWXX&~}`kokBbhbEfZtIF-uSKV>L(qwy?j&u(fyi z|FZWc@KIIQANbv7pCmJr$x4<9*$4w)CJtdrB+*8 ztydBgaAeu^9&qX7{r}&&CW7dIl*((6bM8&@+S# zAqyLib7nO|G z^sa4{S62!hzA4$89;v52_rdf)FmiR$casG66CnDTd7>nN<4O@2k<*fE!}|_K^!<}+ z>aMTn_4mr)J+-ytCtg2^V_{^FO`sm7Hv%r%cV*&eO0peTr*~&VXzxjA|l34W%F1YFEf%C zI**?%np)c^aBN%Kiq>U|my!tLZt2nmD=2X6;>E2rrc%6M!MF+IAzyQH$JJ0jAY~>F z(Xr(x&hcNC&+{pOWq$s+IfB|V#^y9UB;LbeQtzREOm5R0G-=6gZB0v?mM&YCn?Jf{ zrejV@n%^*oQW_d2Oc>A6=NON{sXK+xL+5EU{|BwWX^H;P)6n`Eeh-#e(WzA)#QR-q zjOL^7|_Mgus%2f&UQ*XL#IN^enO2!)z+U&D2V!)j@0t&~i-8 zCrPn>WcJW#NkefO+6VLOF!Vs|`PEg_W>SYlHw%`ZVg`QJ>qt^5&@<9h=>ZieKq`(x zEXHyyIOZH#kY5A+hr$zA~|7h3s1Q}Z}R#v z_Hbrm!NG89X?5|M0*BWY`c%MOoHobavw0w2X|Q^WxD|aJ4kb-W?ZOq4RL{|ppc1;G z12qcI^^%}vSr>!_VixZ=3c>R*?Y$a38$TtqCvg->SHpruB&3TLH7%GkgHFXHA)P&Y z3J&6Y_s$tJ7LbspPnqIy)6hP`2O}O|nWXN!l#>DvLK4d$iIuowMCZ<;qDf>eGa!}; zixxG^ZfKZ2XAUCWA|mVVyVFAvbOQ)965NQ8D}AAwv7RzsMgtd>l@z%1&YBtw6CT*L zxTpx@`Pt{eW2N;m1C{V621uI7y`^ZKb#(}tTv>@F8<0|3rp+7bPrwYMmgb?o>PPrvFH zt8QJguzci>?`}Hx#?~{d3qBjUS)yMIO9hUQrRwCyee77s#u$iJVzj*c}5yLZ=#H2Qnuj}hsaEEd@-&+6k_TH2eM z+7~WdilxOyqS4;|rI@a6({v$D-{&`7)PIBoL8{EjAu@EaP|^)#03mZnF=mKt+DwzK zU#jIIEBKGXYJN#3EOM0af?u8u^Bp}lA^f;9vS*M&vS%mq?qAF#8pNRGt^y zma;AN@t_(qWTi(jWR8%VTSm&VPk~9H(4;b2sWHqx(Jkx~Mp$?6O3RJ}v^jzu36Em`s}n6{PLv_VM! zp=mozEQbrFZj>!^7q@gUiQ2Ito7N1ev|Rju!?OJoIs7+F+{B}Z-oI+&t~|P+Nw(ln zwo0`9;&5w+Q$S5?x4MZe)`?)Uk+fz_2Zej>=r9)USSZMf8KXUE2>%aY&?{0 z^VnXpY~6~utxL6hw=dws|jXlK4{n&h6&Z~FOU-BPt$Fy_;vPjEHiyyk77?( z&*Od~?%zdm{|9A${yYggU~{ZJ0oZ&L$8M)R1NVRXzrhY=&P16%v|oraK8Tn3C;L*| zzxRJr<`<^OPCL{Vv*9%BKor{PSd4p2WyS98&)|N}(7glvlr#0cp?-(-<8VJ)-v+%LmDjb#2uCFg&of4O$zfAstxJ-@6r=YO*F zKUw;pEdBew+yC_FKc`3R=RBQ1VY>&b;&Rv|ocJQ%y9bNY-B_HS^<(iiT%f;TJ>9$g z4o7kZmYWV>KT`62Vt{4P)Tg=jNAGqIH=tKS1Eltpe40kav=Ob0korhl6Xs83hHaU3 z8ScW&={0q;>@y=3GwIKkJ!SJFSy!BY=C$>ApE-77^YVi=cu`R~8*UwEj|0D_#7IQ2 zPkj(u4c^9D-@m{o8({WQEUKZ^o5PEEs;Q=W?lD!h^Y}Nbo95Nv4{6zB$m2=17Qmq2IR_6Bo!*lizFXhR3wN-Ox*VN8ys;Qbgr@D5Y{51lj z)z-|LH+WU7*#CKrUuysEe>UPjXCuD+-unL!xd|d+uflZx48}Clf5NOo)L?xM?6grB zann{|kqw_7y&vfce?-;|-#9I;7x}$ON%*bx`S9y^7GV#}pZh5(fc;ol zs)3jKy4KK_q<#jIt8Ib1v* zJ@~C_zJ3Ew;XPN+y5xqxKmDCECNG=Yaq9l)x9@tKFMF7;d-hj<+gn~c_IPk)>Y>+d zOkuMZIFf86E-B`Aigw_pAI$<>H47pDN=z1HVT)~6p+jY5d*UnzX(E?tf(JZAe801R z2yXb3l0e^dO>UpZqiOwajG)h8<(Q`Ib1_&*4&{^QB!=as694iNi9NY_4S6E3Tr$jSEf`~7Y{C3c=5#xmMmR(;Ux=}$jJ{Em7jfJ^F^^#?U_@t;w$;ljx}pG{QRNyYuY-#Myxr0W@1wG;`16WyNWw`@uizD+i=_H zrHe=_zYT3U+jc)I0s#qLXd@6;-V z`*$^u0GM1Oz(}N=oGC>C2!L3G8T$`)ZW^G>0DP=9%mVCY8o~un-qq{}@R1e>0K4x> zPKgAXi+mprV^<8EFr--6R8H$?v84zz?KHtoTZt_F@oNV#KO~E){FQz%(9|-(v{cCU z@WTbIzpIFBJ?Giz+D)~43P%2D%L~UZ+jHIY5$cVnzw^yWKXgZ5|HGN*%s-B&YTza}M+-TNSYao&`GwqmWL#SpYkhsO?yCMhM4!fI;>g zd$Qf-&d9+Zf})fMwdk2r2!lp7O&RXb{69uNXUE=)e(uj%|!3`Bhl2?AKI1pdH_i8X`aStSTyRN!|5*)LauM<4hF~=Dy8aoRP1QZ( zioso%m0q*(((^@KG(ELN^J4Ht`z42_Il^f1#@xec7c}20?w4t3a2DLdw;6J;6!Jc? zl8MrR=D`8Br6Kc4LQm!wS3Yp|Yp>WZ`4%{Ou+)CZ;5A=77mi#8ud%Y>2XC=^qB)Ly z;IiN@6BnXsE}AyJJy(F2%N4-6>nK8O zYho|M=o2xxCPp|;jWDltURDR7n~@Bm5E10e>iQLd8N!B{P^A1E=N6H z2S=kNw$~56iWPe_^U3cB6Ni**_oc?7}kpa`N_fq}y6S#(K3*Ytrc z&yv=Fd+GbaPx`Urhs2K2f<@in7gy6-#k5pMN`6&zz*oh2_UL7~4W8om)8`#8)cxJ| z6Kx4uX}&adqLcID(No&eD!Ngx1p9_V-$uQHY^!-T>S!0jiYB-net*zNHqN*(l58Be z2tRY?;%CAn{1jD3e25Xh)$wC~8IF`qqs`q$Qfhi6=_4rv?qrWAup>FXjCg>=B7Gq9 zMN{iDpM)C&?04G5l!Yhs`wdq{goT$JRvB8&o2IO~JQOHLK&0Y@&Itie!76UI$=jOk zwqR;@X_u?LER^U(&HE388Yc#-y{;BZxQe#0UYF<8XM!;5^qj*0}ES$Mf(Gp`^*M66ZS;6q|Ha5%qJh3l$kow z+oCBm1(Z4CxMgF~r{6XvdoCz*u*dE>pA%4{0OIp3!_N2V^4SA@Z~I15W%4K7&< z5(v#kmr*_GAM!HJOl*z$2i!ck=?L8P4CiL_H@0VamVFINW)tsYZbUCg&5bBTG3dwD zquYw){84D#4mh}Dt515jq9?FbS|ery8#DKXcVpd2IE(;7`m`H#upmvQvwkqgKQDc} z(Ky8zpWc)^>zMBR+|BmYC>I1YlM@Vhm6SJJ1vG0Lp&@AX%gZD@sOXc?2cCQq zU){hyloy~!kdBVgD0Z`va+z;yP+Z{MmM$D(92yCgS~QL#g&JwtF;ZNdnC>0v@nl_| zXt=!yOFr^)ENt6tB;p3)(?l-wc-%~r)ay=-2%$0@g@p><$fbtcMoV_xql=0%?h}`q z(u#JidBWfX3NlJd{mt$yYd~k0{Br6=>1`y~mx9+wNXvh)w7BG|*~`zK9X-8k*|PN;RxDp9)(xhKH*5*Zk3Dwz z!5=nvJ+Qdv{gvfwqpzI(^()W3_`0hHrz`_?)*pJ@ezlE8KD=1PywgYl&l(sHLK6@x zJ9k6^0<)JO&{xZ@%*+6y#I>Mjm5aciEeNafiexUE<8>URbQ4oN*GP50A~?K{#FCT| znVH#%c_kx9CYB`JD@GZ?aCS*?aY{)zT!GLR4!5_&@e5V@=K-{ky6wkdDg(YjkVMk~ zduy;Chc!%if8hPWx`qiK4h&M4eBlq97G4tlTb zWTdjPtkIz7lkvO^&r_>SdZvfG0k|o)^pcDWTM3Tc8f7Gm43^~NctPN)*iA!CO90EK zjB%qT*N-cye`r)?Nu+pG>H9>}S0PzpEo7e|GGtE^8Pp}p{!>C1eh&I_cxtq{#N4bz zy?i}BgE;#sCh5g3|B&dPJeGK1$B4oUrx+<;tNnP#yy@dd-!#97pRJj(|7Z-5-m`CC zms1k%ESv26?fB}Yi)L4Ct}i|nQXIWo-O9$<_JBi^cU=qd1hHsqkyha(7!I7V4O{Y0 zJm!sNU!qyUUw#DcvTYCCWq9Z6*63SbR^D-6TxSl*pGv9t0`U3x~ zAM`%;gDlJ?dO=&!2~xMB6NFao7GR$9CFLFOVd>0ybO9*t#CoX|wodmU?s2727b1n6 zid|j|RluP3yD}qz;JxCJ&)qp_*cTpr7G}U7^Qi*{j(oCwYH`F4!W&2MJ#C|Gfgj)@G_g? zUS<;&nTLawCV;6G1jzrVEk&JQ7;~a#?Irxu!luw>Gf{${T$NBO5_F02h{5RnG z@dy7D*V}>XG5-~~-to_HJ=mijIpnk5iuuv@`&c4cf(rC6{M5-1R!<{|CrxuY-$7q? zno=)&a0KaD9)bO_i4l)?v&X|WQ~2*)$!Qe2X7}BgU+7BXX*Awzia@v-eU48{fH{DR zwibS()$MTD)8MMs*Dax*M`)x2;Vc$X-(zG-Dp!xJn>499CqL5DQ;VH}rI6iQ)g#fF z|0Q^T@`KNh_esFp`QL)~`p=2?p+g7XjXp5=;Gy$Hn%Xk>4y~0uxCKvUTTf1>C!g$# zK5+0>^U1*%bee~e=29!oLpsehNYiGexf*LM`QV%AF|GPP8^5EL>sztmJG6@DqO^mt z5wTCT=QL*cZ;psj3;#iEK#ULWQID{d|0UcWeB|HY_TY->1JN-i-=fXL$>?VEtfrVx z4)y9MKgN>_@FZ1jxt}EDEpY!{GsWN=lwz<0xzt&?oTQ(efG364ljG^h!5#Rj+4|~I zohAiOUa`_7>oj8^MSr)_j3y~Mn2ZsLJM@S|=f5)|f#o4C8pCyecG#fdD8mG1to`8w zC}Q9jKKYu7Zk)8A8i3)z zb{4Ruk+c7_Ph+8a^iec+J_L6HX0#+b%P^X3J8U=D?y&8)y=*&Rb1+-3t=qQMcD?mz zzwHp>tYaM44%sZUg=jLC*=d4_VS)+2m{mmdH%vjYAWb;o{$RfsbAABchL?=FCdNW+ zKMEU~i$#1ujCD2lY^=n*(E0tU=#$?=0}l;GAFwU4-7hfL1`Hi)i^P~73+)s>YU!&c zMCf{FAT2^OnLfWmHBuabDy*lU9eqG$sY&>9s`;h(8NT!xZjoEZSzUc;4U#q^<)K0B z16kr26ECAz-e#pdE1$Fckz80Q+3Z8Ft1Np$JkRqqHuXEjTwu{32-p`9V2nsa6?e?# z-E+6j74zq=oO{FEJLc}6dtk18F3zg>`OcYPl)SM=<7t&1HcMy4>I`uSmKZqHMjf@W z>>jx{gVUM=$~ZN5bR;9dW`+gx2Z5IG8yTY_esqqic({rt8+6SHOGmzmC8E4N$6nGD zxRUlTu%-QS#biF2vYVKe5t%%mvO~0;P<>f_cm39SwWI!q`aA0P*B_|2)f?j~BCLL5 zR#v0Pom4tW9GJvcAQ%rtJDX%qzo1~M3l%;@%g@%PQ=h_GY-GxpQ#p$? z-lX*rYH@`ei&`u(rpX4Qs4~)Eq~$~!CVW|y2vgaIOT;jq^vP#YcKJ?;?eM#e?Cjjg z9VML6|DyuQjWMS9h$+4pQ!ti0tUe@VkjG(M>7|_{Y?a}bTNm)A1!pY~3n=qRfe{h5 z;JEP<8owOokYpTH)~>fr01jowNQ{GLha#y)FfHOsHton&xN=-~DBdcIv_ZgirtwYsLsrv!24s^AwgOQw;4)ZWn8~1R{ zv3Nu_!?x!$wb`O=#}uPX<{F$Cjj$b`hbw&2ePSen;6dF@9cdmWT|+u@y1=>PagC4c zloXY4SL5UeU5)g*szZo3l$xxyegrWjWMqtugkl1QX}UNkhUv@i*aw!@lYG?}Ub%8+ z_e{ZNE}JQ4(l}cX<7_jhq$As>Ch!Saa$Q*P32}s*O>;7&4WGr=)am7S568!xB{UM{ zLL6g~3To%(0%O>wsu9KrnQzqAMe>c3a{QoMXXJO~znuS0zRJfK)(3_wUPGj($;YZC zN3a92dR-xxqh6!$kx8@W&l0nyU>Spvc=XB)vrd+!hhE1hZ|e1U!zJ&i-d(-7T2>PW zMps1WYNX|jim>V`Qx%TDfN;3yh#DTlv{z#~8PYR<>B^-$mfo;bv8BGHWlOu4Ze40) z_}RU5FFjjAcJ?UDARBuG4UW~|dfR+ZV~mk2pVB0irPOqdJu1$_T8d%X8^icCbscgG z@08Q2{wbI-VAJO})EyN$%W7(^Toc#I7;bT`EFh{vE9n~7$^xtmDZAkO18h zXsx_Au9Yz?O|2|4Qf2N8&SuOctvnj_NJDN!KCf$EVwC9-XuiNB;GqQ05)zpw*3QpC z${g)GKzE>-Tg2}{kKkQme(uU#$m(zVb3=+;4U#H$BSQPXuKfoy-(KBHcr?vjOUpahA`1(*ltC7T?Tue9Y7gKFIdiK>ja~8Ue27jmY;%O+q`%S)!lHPNZfoF=mAqB$>LNzLLIJ<J{1`0;*Mm_lCu;2M?qf8 zy{RkYRUD4F=~YZKDkUpyDio}+G?f3Zp=-$xSp@Azi~^O(_crB!i?$4eZ79fDFoi5b zTu8p=h2M)W$%@(ROvbF(8Avrtr-D91ueb2i2p=N} zD+M-mMKC1KTyEt+a=5*~rxk6&KHIb4WQCbt_pLm43zm(=kg;5+f)B;o-X3SB5|8P! z1sj2-zOhtTKW^KnX<_Eot7?%d-SCUCw7d($&h1BL$2k{EP?DHN+5ZVS*ATyow%pIm{vZmi)0Ura!K!5P9AeKiX5_s zIiIqJ8Ut-hfPuD$`*jVkl*izHD(qnar{>Tofjun9rtGq)({+6oXIZ(FZEjX+mN<~b zSK#aqvV>VEA!Z4!UKg#b2W38scPG|%YdZ+f3wAI+H($J*e<)uh!J!Nt74?`MJR0s- z&bWRRrPY!J5>rtWFVjv*}kesc247$N&V|>KSK>dMhcEC8#@f= zY!a00n4plBm~u+??r?lfdpF4#D_LGso?v<5tlTe%qP9?43$>j)T&}lhZPXYR6hhI4 zs0t$&7iEnhX{fewhhw-!Yify+A=yZ5!T1bvJT%Ozp%*}{HaKp-jr3Sy+?6}nCH!%C zHM!7ZR>r%^4wQ+q!VD+^9~*aWtT6%52|JL`KU3??FWLK`a31+kWxT0uMcHL#vaGP& z`qt`tS>JxT)G7QClnUoUB#q;T#_@55w3hla#hTnXtkfwqo}x=#F|KQz2#qTnC&m?$ z{1e7UDs@_{u8M3{hW7gAH}bO@cQoG7xVurYMx(HBxYe>TZNqgfm1(ue&apt}ie&))|?o7!tM|e|+bc84NOxQ;17Q)Q8S9?$7;qS#^ z%DIJJ5YSAC75s9)ZW+vc&yypNZ-tS?=a1bn_J*-{j8$x`Z|wZBE5~+Q51eDkRJhVI zBW&y#PbzY!xMer0VnhT2rRs`&LjP!>CBS zHz`OmZBp=g_Ggq90xjOb78+y0!U!uYPp0pNsA$S8q>*;o;7eW7aA_>&tawyrB5I_i z8`4PoK4N7+y5rIa7q7brQCy2tNEfs;+L3y?mU4`)X(^|%Yx1ZPlb9E&G#~P!$I7?0 zgkwBIry|{=R9RVpKrEFVW7N~NWSe>Q%Da&Y=i2bSY5UUzOUst7Sh+e!E!$>}=TWpD z*0__GLV)|zL3RxUQ%t}}N?pmVROpxCvPfyty@#IA9D$tEXbee>#)G)R zG#;=t>tb9<$RHGWLR`{_D;gKwBh|Q|NBSUe(LLA^#a>j8kBG}O(pw{xd7@g6_ouO! zc{N{QTCyPZIt9nh?_oV&Jt7aisn(9gZBBF>rlSH|+Ud9hQSu?!a}QN9n*yu)v}vv3?OsY{poRY}dy)5`KOTY~Q;-otUAk#fBiHY5l)LkI zlHRAsg@`!|rOnOAw7CinLmZPx9Fv42VFGXEGnhCcpTXo3_8Kt3+G2@wyaIW7BSsvC z40aRlM$B~_`b2AH^MDeFs7)YNYR6C_Jus;=gj3>hobpmk>g0VsoWKd-RE^PhR`_Y^ ztSsDGC<+aaH&S>QGXID^NyBl;v2gjE^qIn*1TOVPQ2O%na^MdSc*!_FFC-m6+ahAlgBPF-N$a(qVHtm62Zcl(Xipn8b8E`JmzpQ+!&Wq2`qI7$zI|A7OiPKe z(zw!wKQ0Cj3blQE^qlS6?GN@vzjNi4SF#v(5evKPc1eyMSQwWCR&+HAy}<}`*bGlF z(ts1Ch2hDHpaM=CA#Wq(ZEgp)S?xCJu)%;cssf`bFscGG#??oX7#?HiDi#MZT?@KW zE$B*1{o=JiPd^;q7keQUuq8Tdn`<+1eKT3GV)}rHiNI7z5O+_GLFcdcSdji zbZy)$k^SjTmm6CGcke2TL~y65l4c@O}iA_tMS@xGKCTZ zy5#|YfW*q##0s_tDb&aM(vzPz0bp#s?%Rc3|gST8-3v z-o8EO#tX0e>ak6~+`irR+bf^gxpUPj5D06TEm@3l`)b2$b11uk`q?_!9vRpPrbp@u5}$1js-J`E=b-vIbp5aiLD!G2R6n{>{pgDNIS#Mi5T8*uu-XSk zYd+H|`C`;3R-3D_p>UXkY&v$cuc$5OH3c&Ygfo~P6w?xTiM`G)3W|zC86$!j z86!qyG%9I_wY#Xayg-eq$*CTl(J*m@^5o>?jt)+pK4x-i zNsiwp5j(&=T1=asBa;K%4VL_B_wK!eyD5|^7*U7AQ0U^r8sRkh#oiLGHwq2zesA%#pPs$sWPP-qe~n*y z&A6q5$B7?RjQybqL_7KGFBg|AtUUjl#|iO0ap8n3`bQlj#My&qHjTdX^6?kr*^HvC z+bb7@i(igDN*eJ=^cmZHNZ@2%Zj5*2^L2I(=4##)d1*g$QDIsJ&U4MkNXsrs^7)cz z1&XiG9|-!v_MpGSo$Ys*mBQP7-$1uLKLt^9mWse$->5l zU}HnWq%n~PNLhQ=|$NhM)hAW52HVE6mnlN-Yl+if5RkOJRcOAyj5LGbMz@a=^pkTjTb5QZQS zr^8wb(tY~|9y1wl-498^VPc|_Yhn1bB*^^am>huBRdG2;rNuaqga8Dg630-+r68@+ zHvg)T50{Osnsx8&NT~eb#~$t4ymYL;Hdt1hFe>AulDcupC3XJt)_Yf%1vVDeZ=PH} zH^)<4IHG7?U)P3N(P#KrKI_;}c{GK;RhGLTdJmsr%v1SohcQV0Mu zpn(P~9-9FT@Z=+CJ-AGp(`YL{fcK4jt^UoULOIS6Ile$binpLBKOqMy?{s-ZQI6y; zQunnwW=hX6wGTv+q8Ij-qxbc_QCQo zOHbajXUFuy*-**;ZhJyRt7Of{y0%N>DtYy|<%7rB?jJn!xbTlGr7J$`qVfeLTIo33 z7JWu;hRzkSLw9OYzilA22dPJ4eh`1t^ZCg9LRsVlk!h`0GU=7CFgH7xo12}Rosc04 zF?N0cr_5k+SO~)mqY|8jnv|AF{n+qph0?3Z+;*)@L|{sfbRX(K5dhxSp(6ksG{$M- z>u3nf9dZlzyyDz!XGR#*Pxs~*j!X!f)VCKx$y{2=hEsp<5cT13SpSFt*v>s%tNgIS zCI|Fxt>GFJr~PNOL~gz+eRpYo)vW&h)n&f=QNc0wVTkdZTz7F{R?#ugg-OG7K@~yZ zQ!ZK=nw6aRRnA8xtUj^O6nT<)cmO2xVP0^zD&na{1t8_CMr{H}ToekX2GNGi3N8t@ z2j#W`9xTWS*hX{71N5Hbc)=hROGs8?b9>+w*!*_lPa2WkuAkWLt{n1P$0oW$aQe(p zIRH!r&;Tg_08;?~mjO%jz}2IOI*QZpY;0^5_4P* zg>|iV{O3*(Ot*g8;MJ9ug2Ffvx2`k|T4Cyir4}*9XtsbUXbg8h8LseCTNmEG<)>UX ze9b0%=)SqUERUKxG;~F1^t2Jfk8aV9(59&y-~FJz^pwhJd-lvIKc%R&CbN3p%q5@F z5q{~43DL*+chgM`N#a*Vf8W@7_UIGFjH1RVrY9ibHaMO;pe3VtPBg*Y+UTRvy1T@8vv!138mz#bA|_w1pS4Vz8dtw~`OJh7*V?0`8@YGZk0H&exZn>&GDK&Czz-_uyS8u^$;hj|wHi)zguNm|L$~#bhfHQY-@S zo*9oNQOC*A_o>v{B)UoR6aCma8+Xc0-8sxD*PbGt{7o;21@NAcLTq<5`#d=|o89XY ziD)J4Rx5$+#H9oAh5?k-F(uGr3}Ztib~=+8Lf~?e@I3rg+P-(&b%XXjd&I|kwvm@5 zb`2f_dwz>p`te^;&-0_tsKt=XB($4jJhCuR6#5ZPh8F6Qax_Us#%n_a$-)}NCCsiMJa&pLU-1!C=RNeHl~ z??Vt3NCNQ3F!|fYOl`ZFN(v--15O_v2b?4vfe=$L=gRGIBze7BD!huM!W;59f*yy< zs(jH?neeOd} z8AkhNhwOl8hYJrL?0XD@G@q3pQYjK3ig3V(k!?tl_~ht={n4NBDThhaxzPcheC}Z~ z#aw86u%d>gVY-3&!1NJhLfa*un$nwcZi<}c;i(DSp6G>4!Kr?)p8y;y342PA$4S3t zTY8TOrKPzu(vj?SBL$;Y_H_F3r1+HEE8WgyFBW1?Xg~l59Q_&A!w01UBaAY;D=57w zu3&0{&z+o#(ZOWtO^{w0N=*o+rY4{pwI191i%EQ zcdLrTd=SH(K99*_B_P|Z+onb@%P5WnqC4IYHBs+t+`Vw**crU|nZZH+qb<6{^At=v zK0V6CLGfVp{F5inI+33@c+B7}uxCl=y}S#1mW+ioDI}KL{IW<*SKqxB2EkS6ltHIcib4!1 z4;tCjO=MbT(X>L!*;fO+411G(Nj4t_7(X&{=qp>HJt=6nC#~>x`L6d#pVxi^hDoub z1z4DadIT7my6c3fHD0^|DoIY4JrI_58`gv#PH;4Ym6etInn8BzM#kvg+)Rxj5uctA z^)Zo6@~p+Vnd@O9t|^bPpz4|$qP%?fs#(#$MBn&5KR0^qA8r~Q${)Au`RL`mV6AR2 zZ;CdHQ9Sy0qbAb$Nz@x%1!^yb)$4~`XY*PkX?2iK3oZ(Z>L5>7Y3O^vi=t-GuBb6F z?`gEst|JQKp2pVDSm=aAKfB#OB0Gd(cZwB=*oM^g@sLMGkqz~|IS7XruN!HkLEKGE z1rJa@T!)6lyQndia^pHYB+vDSl8_1}JV;q3A#NysGYOHT0Odg@%J5S@Nr~DrrPJ4V zdeF!tF~KNoQgcpvde(@H)D-I42;H;c{w%?*CK~-3KiJ?KsHlK1Tnb+<=Ae)}GUTCz z2WSkI%nCX;`{3F^6Y}yA4+PD&-j-|y&r3yg5bDYJD^>rV8!tR1HR;0Mtz$;**?Fqx zl*_-dM>I`Hi@w3#eA4t&yLR&1qpj~CRK%X>jNl~k1h_&0K5l{z~qVsEN+yPJl$KzsOZ#1j%xKfSUiYNpuzSviFu<}ZmfAaqFcL63pq>4*JpqCRno zz5#I17(vA2`fI2@$eGJI!)SZRUkJ}|znM>BmiDkU6Y%t)QBS2Orcy46dYBj7>~oqFpZNZ&kk6C?d@sD>uJ zraS_(S*OqX5t=nNFiNDCH}!X}ZcSNo;_8CDepjGj%=p&Dr}y*QMtYyyTQh#ulJz|G ziQe$S(mW?miN577;eQ~1ihA%-XhaHo+(=)W%$F%XTk(_-4{9W>(u0aZQCsZM)N zLQ)dUxgMm_qmY#Y74hB&b?oBuIuH|3#97CXq3(ecnB#^iD!Qk3#t)u+WzQaN`{_lS zzA~nmfAikuo0bjMpewSoWzw|MHtZsLVESDC8M2th5If@{J3q}%gSZ&Ziw!>jc+>Qa zx%Po@|DN`NhEo9KBH5L?IAuQ&{rlIlb4Cr2G0Dxvpw26zA&% zvA_S89bL;?3-g{hWz>SQaCI~ny^MvC>iX47u+U5BRh)Mq?oKAlV|N)<7l!zT5MLhR znW5Psu_}iz%HdOUcuEf6md#I2=XJ@mlEt}6d~*^%$;&gm+&!WQ4bGdoTLIuN5FhmB zND*}lbnl_Jv_Rp>UyUNFGFzVSEAnR{4lH-Jj?BvO`$~P><#YLR6LV5zZjN$MlvV2A zm(t0-)DdkScy_>w&N}$0mI2tQ_&*-Nq0FJ;B7apL<`fWvwb-0Oz^sDIOAS<31Z>}T zeCO8accQo4dcErN_MP$D=pzdrti9pd7k_sBO~=i6^x$02!DpU1Ctu{e)*!eO$Zh53l{sO;;cAg|1t<;rD5L@imu6 zuh}+b-3be!a%?45$^G8;Eb~Ls@?nc=myG0@BYA4h@j2q$bl#WF7pC*n^y%s1_%wcD zke?dl%Y%GYkY@(@5+6Us%a;f~B0$3Sx~W~vNd^b^Z^!6)KpQbUd0*QZ03RErS`3#L zW@Z(+p^g|QM3Ax8f+nBLO1{Sy`dfoJ1KK57ew7ozt){o>k_?!W!}eBraN?ECe5FPwATm8ZOO+fx_aa3#fW(`$7g zeMyMLmd69e)a9X$kXVq_nk81J@-9)CtWrJ`BVp{uJjFhHxgja7`G_A=`&n$2{DNeRC zL|HoFsNh$}$#%k(p*YT@@tAo%XpAAl=R|iCzaip7uCu|}n)vKD;e2K{qImQqu3pv z`DYu2CPr`xZYdlBVaHk&Cv!13R-||_Yn=$*`Je#-mdt`^kyBww(rINb8^N+zHqIf* zg=NWOBcUM$tPnFE#n>ZN!bY<&R*a2h<5(%8%#~wpS0%&JaaPT0unwz^)wA(f88?wl zVhzk-jfkH%1uN60vFU6E_Oi@kv)LRrmmPyM-kaEbb}Ty%=YcL{i`en-mrr0z*ixMM zwwyJy6WI#3lC`i^td*^1ZEOu&%hs`Wb`o39I@ku*$+}oK+sJy@Cf3XP*k*PzJB6Lf zPGhIDGuRe(COeC5WnW=uvvb(F>^zJ~Z%2E30lSc0#4cu+U}e^2YzMoXUBRwoSFx|M ztGQy=uxr`Zxs6@NzRA9Y)j2o7W4V#t#BOHaVYjgFvhT6)vs>8@*bmu{*zN2Nwv*k- zcCmhT7rUF?!|rAGvHRHr?8jK|^&oqQ?Pd?NJ?s(oQ}!r(j6KeN#-3n5XTM;-WKXhR zv8UKx_B4BjJt6#=?`z$7`5Jqjy}{mO zZ?V6y0roa~2m9LIW$&@SV*U3A>_hf9_7VF#`g z+{gVqz>|3pq486B8aBUV@Jv2}XYp*F!*k&^T$xn89cAS$sC1!{@@z z%;QabK0lTp#~1L0d=WpMFXkukC44Dg#+ReVbs}HESMnCVinsFByp6BnYq74fou98NMck#RVJ^WsNAHSbJz<Uma0`#{iUg4;l7vtAMF8RQgCd1p zFH%LCNEaC*Q;ZN^h)&TZ zy2VD(BQ}X%(I+;Glf^0GRB@U(U7R7dh%?1mVypOyI9r?}&K2j0ZDPAPUtAzA6c>q$ z#U68+*Xakscf+$-)A_lpO_kHt^KgNSLqTRbfG zh)2Xv#iQae@woVzctZSK{6hRvJSl!9o)UY-)8g0S8S$+6jd)HxFMcb2Cw?zp5HE^- z;wAC2*f0JdUJ-v3uZlm3Ka1DI>*5XZrg%&IMGT0y#XI7Fcvrk9{wm%VABYdd-^54a z@8V*aVkK~9vDWP>zhqns?K$f=<=5pm}wHd4v46yiwjHZM5hw^XoBl&mvvHV0Hl!G!V z4=JWNrb(nym=mxohjJ>Hax0JWssxp&l9W&RRX`=Hph{7xDov%U43()yAXZE^_I&56 zkjhgdRlX`vg{la<5=W^LHClz$7&TUnQ>Cg*m8%L>iJ6BgOvu!zT2-g&)p#{QO;nRq zgECa3nyjW^USb-?7H6oLYL=R<=BT;q7&T8dsrl+yb(~tD7OF++c+AF~pq8kmYMEND zn$?MFg<7dv)GF1gR;xC(My*xrRJ%G!tydjtgX&aWs#|STJ*K;(Hev3gPiNIt_I)mP*E$U2lmfEVmqVLXD=U|59Jhe@2SLdq>)P?FIb+NicU8*iqJJjXs3U#Hr zN_|ybt-hwNQP--kt8b|5)Hl_))b;8H^=);dx=G!vzN2nY-&NmJ-&eP)AE?{Z57m#< z?dlG-Q{Aa{seW~rx?A0&?p61x`_%*L$Lc5QLG_T@tsYi;)FbMr>QVKWdR+ZXJ)wTC zexZJ;o>ae5PpQ4?Y4vOMjCxl6Mm?vVSHD%iQ@>X)s29~f^^$s7?N@(Luc$w&SJj`? zpVe#Xb@hgNQ@y4Bq6XC4>K%1Jy{q0+e^u|R57dY1Z|WoUclELQL>*LvDyj})mlL-M zo3tsL&1SbbY)+fYq#gECxN&Z`*Op*Qv?XC6)^7{gl5Ig-iY?WaW=pqa*fMP+Y+1H! zTMptMg=~4Yah-h~9lp-4&J8U+>)Sim_O$i(^>oS&edV&dy<9e}+n}cP^>jH_Z)j=l z>FRWKZEow?)Y{e4X5Y}hdUaQ?y>V4f+vYZVi@rD-yViDfwyk%xm{>cK1uKBP-U({Tbs5+?FdFJa*^D@s&)u}J8 zrq!#=uU%a+$bOueu*bX{XQuBlFAL0voAkxGu(fSYG1pdMfSGz$?kRS34NWbEn*{cSKBGA9W5IO>fF@V-PY6I)no5&+l1^~ zEj>M5CwH{1>2+wRuiLG!J?(4P^_nkNcb(j6LaVxZ*O^dX_v%is^}*`SRh!z}T|K?) zh$k%_-uBL3l-<_a+uqgbY1`P>zPY8Nt+TbwwyvvhQ(FRf-qE$Ty|tyIv#ZxliP!eD zbo6$|!BxG^1=GvR=pO+Z0#FtQDy>)_+Z18QSHT@0v>0W~q8$^s&>d@5p}SH+5` zisf7x%cmmt1cMX!2Yf1G`RFS>iNq3D#?YvU1F_O8V)#_Xf!M1^tcdbhKKg+!D~4($ zo;a3sB!*8UhEF7xv#u6>UKOukJRkkU(bv7IqpNkj13aXw&AjVaV_tiDt^3|h>snT~ z+4Qet^?H4Et%0PstzOl2ira*Gz;Q=!PkYPSzHal{W8JUrH19js*jw9sT07cY?VX!f z!FaUw5@bEww4ohx*wWet0ookDk(>HD?Q7aLK!gLBVRBegFU`V}~HCwcwEKny|3P>qFb+xQ%Z?7(|h}6cQ z+B*A8a2z2}XO`DiMa)Z8xxTBcba!>Pb(;7)8cpNoXfdyzDO8)j4Qo2uPVux3!DSr#ts7A~_)T-r^GJj*h+?dH`pdniZGNkg!6P7IIr z7T{fv#-OdUqos3od#iJ9>~%*Bm=^mA6w$8PVCJUi13!x?xp)P%K9vAzr z#{xVHhS2bA8iLhAs&Fq=c#&D*KC{A$to-||{1=(|_n8%5WL3D&s_^2W96cuwgO?10 zPaT47u)}LNxf{v8YZDGz8Ur1T)AS=pOPjuWnskF^!d*kK3&T0RZ5?gxE%wE*i)bEC zHZO~HQv1pJ;#$1Ay{)HhQ~M^@$uY>j#7uaqz9ekyLq62L!W6G|^|XK!P@hw@gqI=g zRJp5dQ!n`5i??WZ+sNRr^Y*R-<4kyyXHEO&80_7IoH}DKTD#C5wY0Xjb@uvnOEc_& zt+%VQYg0lDD}Co~qy|fa-f3ogrEi?mdd;VUPm8M=hmB6Xhr>!ZxR+ySv3cw`Ies)h*(fJ~6LPoX`%b(`AbJ z?ee&FUA6_fDP7po=dj9`Q`WW1`J39klt--GBlOP{KUAv-`L)#4tJ4T+y6vSlU(tuQ^%`}w}=P z*$RvS+Pa8nUdkX=Skk8o_gcAVB)ry3edE?2Yfy^~TAGT=7*tgrhbrRG(A!A-SyddW zjzcwZ$TYdgxvDPqthy`?#mlIUl~Epv=T{!jr97TXc|4bjcrF$3TqtBB`U z5y!70o^wS!=ZbjFmGPV_<2hHxbFPfjpfX-YWxR~aco~)PGAiR`RL0AQ#LIx9Qu71~ zMdD>d;$`4$o!HxW8IgDyk$4$a(?+?(%czcjQ62xHI{rm<9INX17d3ILYT~%m#Br&K z<5CmPxh9@-O+4qCc+NHPoNMDb*T(Bx8!w|aUPf)ajM{h^wed1)<7L#w%czZ)Q5P?x zE?!1myo|bd8Fle8>f&Y8#mlIRmr*m6ORNmg*WyQcS?q0jS*(omvRE1AWwA2K%VK4e zm&M8`FN>8?UKXoId0D)S@^~3TI9J79I_XItC8p{>KV z8uz_oU7Ot8ve|Z8%cv3zoqagD z1%BctykEagQ6>)kVRxJ8Zi9b{JbE|50-|F0l^eT&DNt90Bl=96U8}qLR&{_vo9*~R zxz+qnZ_)=S^8Cpw9Xh^iTdR;X=_S)_irp{2WhqYLCVi}Glq zTd*d*2|L%dnScxRv0iItSWm$Y3j#?@C|*J91sG&P9$i1{DR^W-V2}xUO*WY~CegaB z(w#BDqZws=33i!~n>b;CLnLs5f%>5nxN8bKP+yWn2XLY*I=ghy1kuO^PHRKAw|#Z* zIt*W&)YabE){_)}WZi>~y}ez!BTNGvNgC?yqQ`Wv^}MR4(y}yFH4#g@2{QGUAX94r zSr#3T)p8PKS_Xnl%RrE683-~h13{)`ASj-5m8s{zz_MWk#d4{%Y#2R@r)mj0vS}TIg{0NFsywYm7=vj>7 zl~%ijXI2XV?Bm~}TW!{@q9RWFD$8b9SJYY02(q{ZC`RWh%gPhZCanlE-x3t#WtAm4 z$R$Q+{U93hpD9fZDo>?`jsIDoZA*jG-?qF5p=_T}=Ke za0afKGYDkzw;IsUGYh{;Y$b}Nv!o7nvnW}OF%h$tDA)3M@XTt<@GQ>NI?HRJx3T~wq-g;ntSUuT#F(vjw=P)#RG=3{9fPZ z`P!@Z9L_zTvwhCzbMM@ldj^!WI*j5~EXGRf37MDX$|$czYf9EEWd+qIuSMzh2zb)G z7{%)j*l3XxcQxX*$jO?eb%LX^FJfI|gjDtZI>`fnoiqZJIKp2ijQ}P3hYFH*61_O% zsCsEvqg#C9>X2#}Yg;dkm8zL|QvQOZ`Ug*{r66f-gC|v0kmQviNL=rMprjS;ua{~r zNM61EdNCXL@{+7sjw483(f;B)9O#o)H0(;N2#=#n5s6pwBxlh~afrDx1d00~9c>wF zISM)PeG-m}J0ekXUgZ7KUwjuuM`iEDos)Q$C`q53Kk@3Gj>?{j*Zw3YM=oCb6Hm^m zytDg@@6Vt`T-zikdmyfI;>k9owT%%i`da*xb0F`;LGn%lVDaorN2R&MO<$gKl9TcmEbS4v z+1xVLa+Ktx90befmSAZw!3eotTU@JPP zg*IC2Gq&&p0<365iycHM`iCeb%3_hOcO-2@OV)`F6txj8MMJb?g=kojBGyTIDF4z} zqA^yoih2<3`v*zmkSRYoVt*@I@9if^<9sHL3y|#LTF^>1*oqEnp^>Q;-9{@qz=}4s z*uhyrw1YE%L|H6Sw30TWCF?{7irR>lq9IzcLNu&M5hGI)O_^$;F;=pQym8@b17wuzbwor8EN1~*8q&q(nPs%dg`H^_i z%Ah+x5>JYZ?)*qR*#`Dck%t{uqGTJ`k0qXL1N*GRlWkykm3XoZ?7bond#XgqHn8JL zJlO`$)e=v(VUYI}f1SKa@y@rU-az}?Cgzot72B3Dj$xcaXp^hc1rh`i7~A7Rh0Oe7 ze1MWyQc`Hj#McMF*Xs!U2=oM51OYCxEPu>n`Dg-13|aQs^qCB9)Y#)gjtbn}%66!z z#OK%`$|0Z&Cdefv1_FNqazal|=;^55NMIt!Bw#OSgn?W!kQW9+iv#@Wi^YK!CVyxn zd!^5IlKsW4vCN#wC6%OwqB4**16ebWH3KKvK08_MeJa`>g)(7z@*=dTPa2}{sD zL|c}Z|Fmdg7httOlRB;z9-s2xC5OS$p>HP*$E^)zO2Vr5yCX-@l~Yr(cO?lr&Q?i!}IBnfKH4a zkO|-@0x-4#bO<9w6vXmDY%vHe18Tt`yFm(f#;%N{AxL3h`AFtOF-9}S!FrHl6w_mv zPG&ll=`_}v4m83_B#^8aVTEXS$QTt}8Oe$f&qEPLvrHT;8ljowNwZNwI*lA}2(;DW z8JTvF34u1E-I?yn=mE(PXd{efP8{Yq1lmY`3}Y%|y7@Mqv!T3&a^6CrGZHjqFBCd! zfnCirfaGB)dx#$iWEqM*6go+MG#^c3I+aI_1rApXhvwmUuB8{A%^{!1u(xBtiNjMm z8jKnXP9C1up`&ShG@beB%-^Nh1r58o26iiwncfXNB6yoKfPsv0j3dqU;3S#r(W*sb z?gn%sR6bH{05+K;fz5=dTf#2pcY%ErsdzS}$=m>JCd5%fwfP`W1N6qxqe^e)cr(XW zX)s?=`U-C5i$D+a3E%>73`!qHUq(HnAED5Wu_L3Fu?J&s#(qFQ*6GJO{aB|T>%6Dz zZ$5`-HoW1(J;9b}!NHsdR5K!npr3)y_XNy7&;$58d;0)m7vd=oZ$=+;6lgu8ALAoM z9XL(U@Bpy`5j&wR$qQ}FtAXvA<80mux)ak~n095_ol#5LkjYG-C+LT;ej2C&Hev2+ zfmq#m(u87Z;#itEmL`s+iDPMkJY+bQCXS_vV`*j$%{;c5M>S*A%gSaR)l8$%0jIaDW5n4e*15oqy51AwyOi*op&Nae)6AiCN0I{T7hKEww+c4lBNKH7(D34M92C(C$&R&x|;j#`afB?=shT}`_R zkaVgM%R(UP6Nvf*qCSDBPas91<|x!0g_@&KBMQh$+vz&?s9AgWYo4U5}>zKnWCKSE@Nu(SChP|I|0Mo-3mz&4oQ(?C?Xpy5b09E*ly z(Qqspjzz<J^Z&=-;&VUcKqxen+@G-4raj~VZX{D2S5M0ZB6Izm6uu8dvzsFvk>GkP-iV_blv zooROxTw(DBkk*$gEE3ZCa^;of$}7ti`iVm;%N0Ho_F$y-H zJ20MJqZqZc0|T+E3*DLDo%!9Ft+Z6z)Jky*>0?w5FuO<35+l|0*5NRfzgl*5r#7-mT8<%!HG9-1SXj4fg_od z!bj5-9l@E*bP2GH@C!2!@o{6v-<$$Xr(!AW8M zSjHU2BFKb7XFV{S=|PMU%oz%qP*|a(v5awuG!#~dL!O7i%0?hr3B|R+Lf}~DPvEh+ zjFXu|v4jdGjL$Ns3UnA}IE*tKrl1UmafZVbl;JS=1`d29j9|`CSP2uN5NQ~EBRUq= z!`N<^kj9*J#tA%XB4Z|Vau{ z3K|+glMhkCL`I4wN+@KsNIuIlmEc5k?L~8)L<>|W(d=h5ydpXgvk=W*MZ+uNOkku+ zi{^Qc!P)A0AqM9uLU$aE!Fh_X4tb8@xsKtvj)DG0aA?M3V1?)0Iu3U0f#HmUklQ%6 z9tZ1mG#YWmAtI8YOvWJ=;-r%X#Ig{W$#gbj4j;{BIuG&23HeM@RmY(k35ys@m|xB~ zg>fq5G{!2(#AA-`0-YFXj^cTa;yH?VM6m&!?T9g+=PMpjfWtEt&)JKI|Mm1FF(d71 z@f=0GFpg<6S(mJAOwfV7_{L+1t|g=0y9o*STP=SktXQXsz%G*wXw){3V=dU#6gy?6XZ5(R&G?3)SVa6MQ8H^@AN*cyN14+{UIgax=PAF!&1m2E=9>NO7 zO6FHVJ{_7Hf#VqIypzuQ(^-Ey`;!h2PlHeKrt^HIvz2uAGo5Xvv#oSk*#H@`lFpeK zkF{(kjK^9A!qypj3LY}!VTEXlcRchHoySCQ`+qjhXoNY8bS}u|b3wK+nQ1yBWOFPzeAdf>KWESZfHI%U z)^pj)=`BxSXb zYq*eoDCC?JvR9KaQ||DUcHPNb6_cU49yHb4WQ>Xg(o9X}{7mNjplbo*D#BPlpc7*q zaacnU=f8;c6tSKnwqC?~inyYRpzQ|a3t**)b)pXf(^REJTt&s4rDD!ZF^?_g7>hZ^ zVvequB}>?D3CowT#S+d;3Hw~a`b#jj5uFyuN(qlGWyw;OEM>`3mMmv}IdjT6(y17m zfxZmSVn?cEe=2$QD|u`s`&P-htz-`?IscXH+ccJ+#xpyOC8u!)rm^HSmYl}^%;9{_ z;e5{H6*P}m&^$ix&r{HOe;&>$8NfP5Ix{X%;tmfG_jZ7|w}YM#xVHnuy&WL#?ErCa z2Z(z+K-}A*yQ}fSp&o(}UD_0bF4^wG-&#EGWtyy&{^57g%!+~LrxBy#M(K2rqQ|Q- zPJtV;{13lPhb|6x6-sn&c!c|{P5AK|CHgYh1J$b5!1k*43Ke=VxPtDc>H+Md3Iq;Q z#e$E%2B2S5y{Hh-+h8W>YSjYJ3st{?{$2GLG@klb@Du+)Jns)|V*{P^j6X2WCPAUZ zll1xQ7x|{f9o$rf(o|;3L{}gTR?u&H(W?@Dg;WX~d<>P;3+AKu!fWWYum=4V zHlc^Y9`sE(gkA~9&>!JT^hEdueGtA!?}HoYcR>BR-H?wy=sclEw~3e0yJ0zcHLOB^ zhIi0|VHf%?96+yyqv)@23OyChp^w64^iH^jehK&f;z1Aeu+X7r#c${Wu>>6--a?m$ z4e0E!6Wtr?(XrtOx-^_bXNI%r#_%mVFkD5~gP@9ZCyoB-pkYAQj2iT& zn1j9)ub>~rV)UU{f&LS#(Q{%W`b=y=kBJY^Ut%|UN*qKViBHix;tTYPIFB9?-=QzW zb@YO`jUEucqW8li^nG{?AH6wC{lq*~qxZ#Z^u2f)6;g{n7cDv4j4l@Mql3l2&)qNR zF>w!d^$>j}no$!Duz*@cy$GnAK%Qq8MHhuu(Hk|XLbiTV{f-)}Q9T6CQZ)hRVWgm1 zgweQzgx*qass}9bh$a4D398#g&?`{S3rMyS?S$~F9XeErRuuiQJ_O;OY8dcWRW$Iv zYB=yWRSfWVtP(+ZfKf`}p;(`m%5F`+)`)+$!d;cCrIQ5zZK+PKc)2!D~+RoQG|ct%S8To%?{*6E_j{ZFQ~f>@7zGTU&)L!%p4LPHCg82@;exYe(ru z==xi6+}3xU>4v-O^j{)+Se2t&)ns(Bs=)u@^q)@qXMHy9>R+$=p#pcoBlunOI;U)EepZflR7pvN-d|_1NcT%f)W!{u7(6^~c+yyH#E>CFMwb!i>j=6BXS_Sv(`&h<$3J$KT@ zc2?lNWpz8_V;XM{=<$B?$``u7HF?g*F~eRf*ts$A++zptGdqoM3M!PewRL&i%&SjJ)bNZS{s1?r&;U*7e7+cTUc_^768{h6K}{9UGdX z!bbQzt|?7^u*mDhj$d5MI#iY4s@&&;<$YiI{g=AUUynbf{;Zut?H4rbpjg6qIQ5CMmCywa%XFt6t zy(DvK;=UtKhWy>7)2hFC{NG;#C}PyC!P4YGCl_&`T*PaoMW+6TMJ6s!$A7sz2k4BJ zb!AA(DJv@1mgH*DJwt1%(B@TCl$MA1`c_s}`b@>zrf)a-;0r~*Wu+!sZTMzXA6+jR zta9!ut-F7|gmpFe$pfpsgX*|OsZoHh7HX7A#mCc4bAR5xNtNZ=uZ!usnR`08ubsa* zY=566vs{cn&;G)5iCu2=236w1n~9t5Zkur0xAU=A@9kPWmcy>Bfx@g4Tep#`9P{YAqHwTY7|6um7b~om)bkpBgY0k#I<>B?Mt7~n|{SWSJ ziofEr>4)0$$MO!FQa6{KeWZ&{yy#n!rM>v_XEiep$JX~fTAg>}dd;9MQ6X!a1_qvr z+n+QzZF>gFo)qHvmgB+iDdjo`Op3cR7J)(+ zNu<^$Oh@zVm@y|>Ip@((B~dAYXVTKkrnOEj4N=+a9EqvHFJlOH zA6rkZxC?mSPt3~b-|5ot9-2`*TK~MGe$wp2J%6c6JiBK1;z%I}pXR$vlyMP95 z5$A`7&T~2){vhkt_k;f8u&z|!Pv?n%aGvThq@=W}jQSK%m234zoL1uL!F@bhTA>|M zQda7t@2YdN2zU7Nk&-eKwWHB@*L9=gD(6lu$LTbtjhKS|HD&n~RpP0M+RkvV6`Yz3 z_;R2hPEOSLCG9`p(f_naHwyW$KBfOTYg7)*QFmHM4Se94(xl+4H=6=yw|BlA9}z9P-Y6 z;Z3#aMRm+xUAWH6}`9b?*+qVi$HjiuGkgK-e_;q!1%AMT;kp z!L2iwKmFj;i|03WDN7Iitp2Y3+dXtU>}H?Xp{;bD{k^zI-_=#WqpMy|)vvUvenVIN z+RS$2zAC+wU$(mE$QN9;$1gH}zOL+lfBtIzhZ@I@t5ht%>G<)&`>%Bh_+_us<3gp= z{qzjOn$?bWcM#7_AiKmKaS6{j?9;!l|~w$*h$-Y=+^_raWX?O*Ao$$I-?l3P>v z6X!eJAGM`;h@sV!nvRdIO`6UJNIV(pxQa^ zw+}BLPr02l{Me|Z-8)r%+nX1kziVIn!rs@8z8BR0>h!DcR$iI9R`FHAz)!voeC3CT z_U{H1xE3@7{Qaz(&DD2@+8j^w3oed#Ym>Q0y?()&b4dfEPPvWQSlZA&WbV=_Yc_qo z7Q4cHTpN^Ib_KP`&TAoGg*#er2y~@gy;1MT8>x*QelT^k-dX2FE85;!oq`5oxI9I_ z3Vl0WTVl3yZZ#?=o4Nx@8-v>UUu+EjYL}r4)PLAzdg{7cc9AYEyU37|Y+NGaR7_XM zBZfrKK7y+QKZDVrHv}01=@QqV3!p#o6lx$Yao7EC2crLnK*a4*^)Irb`*&Bli=HXD z)bY%fq06^#IdknxeVF@^x%(5MZJXvNZ|*lY>v-fP-3gcapQm-%cjTuZ%M@`@xo>`# z8CAUcVdTD>`@TGU?)L#JhV30W;dJGs9y3OJdH>X-_PGSJu*=nN?N7^p?zC@elj&=P zo^N-JQfU^?d;jFj zsN#1-?rqWL(B;nJLEk0*;JRbzg;uH=E1RZt&JF0ItKLj|M^BsTb-4DN>EfCGk?ZbL zez*6p9{c>mYoGZXY2)8_UB!m~#17J~z;R{b$BOUjoX%bt^v1gfuj^vS)owOJbP>9N zYs1zKm}3;LK(fjv`xHrMMZPR7EzI`?*SEB+BzsC$MY%7PDpe|~)CULtvch;fvYB-9 z%gfKk4?Dc*rTq4V>nE=0-RfrTg)^(u8v5kByZ;tCrtj#>P9A-hY#p0CsPRbrs#^{3 zy|C=fFm3;UU;89{eB8JvC+l=T?xoQOB3>99vtiVp-=7>mux^Ik-q$a@e*3he|CHP| z-$vB_?Smz=Qa%|xefFoXIyWS}|J@4_-)grtx}G>}&>vmc`1v0%#eR~y)NtgRE2pNv z_x(%7-|Cu~-f#a$u2OWkEWuR@H-e+xB|xvo z`)SLc|97^@8l~2{04?u8+R!@?dwvJ9epZLk7aVnM{g<0VTo2#(olz9jGyeY93$8uc z_T;GG-_7ILwRPVn-FiW&81`O+A@v!Tk)FZ#Uf8~KLCCOO#)RUe!}``CMa`#9eLmtX z*R7i_T#V^GaM#h#Ygha@{MVujjVr>xv;Fe!#xX(f`(~V)X&St4cv6g})1KIiOIPaB zhE2)dd1n9R-S4$tmAJQjK&OzpopTp_FzryZmN;-zFrxB=VU5ftaOl)^Bug zvhA6W;aPjq>(^OttbG6726gF-^ZpTw-`SIUEZx=pZNIi}CaJm%e0}lWlQ+U_Du%e# zJ~Uq6u_bzXp;y~XrO7z8_(}Ndtzup7DWjfrQ2a3LQsPzHt228EO4Zvn%D%{l$3MH= zSN-3(;%R5+Al@JApu`&!rNU}2{%8BFuCqMAMX+hzRgJ#zQ*h^Oh$6!3{f#n8r=fk& znf85KoeD43wM1`t=-O&%>W^(gcw}?)5x2q5USAtF^md_Q&6dFj_BriHzS?|ly;t>xZVBth z9PCqDx^ewvS98)UHTBi88`pi9JAHG=a=kjGg! zZ+pxgcQEkuP5tkNixa-Dn!ZoBC4Wv#^8%+st_zdK>T7I@v7r^<#wERv{l9VrC0Jht z{^@q>+Uj~L5b@5egG%4p>Sn7A-U7DVjCJH&wJm>lz#Bk$ppCx0O}mR{n=k8aM_>Ow z&!^p2E{fZKx|iEpk9)amd(ZU1Zx@u|_XbS3Z>zXX?#>b8P;DcSQZGKF$AAQeh zW$j{h!}g8FMa7ZtJQ~*N_R=`#q$8bY9r|WP?VK^?hAC5CUO3$CL3a7hm(M5r9-BG7 zHaR}*n@=3q)d+ETPafTp8asWBU<50Gi)P1v{mr<4ay#+QB{NpnsU0K_ z1B%-$*>q}~XH`&_<57R}y*#2S+4tbN&?yt*Pd_<&?t_9%q3O>2?H#|L_I{ezD~H3A zkErst&Ybk(?;Gbo<2d@#+pgh>m)HNecfYUc#FH_X4y;zcIyPYW_A|YL8^?E;zUR6S z?)$X$j6c45?`DzXmgln`y>swyZ;WbQyH)jO>t%;~ZrfCL62G>+BfqbcyTRktpkt42 zwZ1X9!{OI1xUMbG7I?gtwkfqa3U;Cw1x95@?ZvAj$$;tN`*UTR0 zKGkd95~rlip?k+ijtiJL=-JEf_nlcRct*Tp^U;JpekT?!*;D@MnVpl)_Pldz{G^xm VXQc0|TekeI1$m{%qWu+${|A> ~/.bashrc -SHELL ["/bin/bash", "--login", "-c"] - -# Install PanGenie -# Without setting `PKG_CONFIG_PATH`, cmake/make cannot resolve -# jellyfish dependency. -# The jellyfish version is pinned to its current latest release. -RUN conda install -y jellyfish=2.2.10 -c bioconda && \ - export PKG_CONFIG_PATH="/opt/conda/envs/pangenie/lib/pkgconfig/" && \ - cd pangenie && \ - mkdir build && \ - cd build && \ - cmake .. && \ - make -ENV PATH="/pangenie/build/src/:$PATH" - -RUN apt-get -qqy clean && \ - rm -rf /tmp/* \ - /var/tmp/* \ - /var/cache/apt/* \ - /var/lib/apt/lists/* \ - /usr/share/man/?? \ - /usr/share/man/??_* diff --git a/dockerfiles/rdpesr/Dockerfile b/dockerfiles/rdpesr/Dockerfile deleted file mode 100644 index 97be38a6f..000000000 --- a/dockerfiles/rdpesr/Dockerfile +++ /dev/null @@ -1,89 +0,0 @@ -# This is the barebones image for the GATK SV pipeline that includes only -# some basic bioinformatics utilities. - -# Start with linux base -FROM ubuntu:18.04 - -# Set environment variables. -ENV HOME=/root -ENV R_HOME=/usr - -# Define default command. -CMD ["bash"] - -#### Basic image utilities - -ENV DEBIAN_FRONTEND noninteractive -RUN apt-get update && apt-get install -y --no-install-recommends apt-utils -RUN apt-get update && apt-get install -y r-base -RUN apt-get update --fix-missing && \ - apt-get upgrade -y && \ - apt-get install -y \ - bcftools \ - bedtools \ - openjdk-8-jdk \ - xvfb \ - zip \ - python3-pip \ - python3-dev \ - curl \ - git \ - samtools \ - tabix \ - vcftools \ - wget \ - zlib1g-dev && \ - rm -rf /var/lib/apt/lists/* && \ - apt-get -y clean && \ - apt-get -y autoremove && \ - apt-get -y autoclean - -RUN pip3 install --no-cache-dir numpy && \ - pip3 install pysam - - - -# R packages -ARG DEBIAN_FRONTEND=noninteractive -ARG MNORMT_URL="https://cran.r-project.org/src/contrib/Archive/mnormt/mnormt_1.5-7.tar.gz" -ARG MNORMT_DEST="/tmp/mnormt.tar.gz" -ARG DAAG_URL="https://cran.r-project.org/src/contrib/Archive/DAAG/DAAG_1.22.tar.gz" -ARG DAAG_DEST="/tmp/DAAG.tar.gz" -ARG SV_PIPELINE_BIOC_PKGS="\"multtest\"" -ARG SV_PIPELINE_R_PKGS="BSDA caret data.table fpc hash metap perm plyr pwr reshape ROCR zoo" -ARG SLIM_R_LIB_CMD="find . -type d \\( -name \"help\" -o -name \"doc\" -o -name \"html\" -o -name \"htmlwidgets\" -o -name \"demo\" -o -name \"demodata\" -o -name \"examples\" -o -name \"exampleData\" -o -name \"unitTests\" -o -name \"tests\" -o -name \"testdata\" -o -name \"shiny\" \\) | xargs rm -rf" -RUN apt-get -qqy update --fix-missing && \ - apt-get -qqy dist-upgrade && \ - apt-get -qqy install --no-install-recommends \ - make cmake automake \ - libssh2-1-dev \ - libssl-dev && \ - Rscript -e "install.packages(c('BiocManager', 'latticeExtra','optparse'), repos = 'https://cran.rstudio.com', quiet = TRUE)" && \ - Rscript -e "BiocManager::install(${SV_PIPELINE_BIOC_PKGS})" && \ - curl "${MNORMT_URL}" --output "${MNORMT_DEST}" && \ - curl "${DAAG_URL}" --output "${DAAG_DEST}" && \ - Rscript -e "install.packages(c('${MNORMT_DEST}','${DAAG_DEST}'), repos = NULL, quiet = TRUE)" && \ - mkdir -p /tmp/R_pkg_download/ && \ - cd /opt/ && \ - cd "/usr/local/lib/R/site-library" && eval ${SLIM_R_LIB_CMD} && \ - cd "/usr/lib/R/site-library" && eval ${SLIM_R_LIB_CMD} && \ - apt-get -qqy purge make cmake automake && \ - apt-get -qqy clean && \ - rm -rf /tmp/* \ - /var/tmp/* \ - /var/cache/apt/* \ - /var/lib/apt/lists/* \ - /usr/share/man/?? \ - /usr/share/man/??_* - -VOLUME ["/root/.config"] - -COPY modify_bed_for_PE_SR_RD_labeling.R /src/modify_bed_for_PE_SR_RD_labeling.R -COPY add_SR_PE_to_PB_INS.V2.py /src/add_SR_PE_to_PB_INS.V2.py -COPY add_RD_to_SVs.py /src/add_RD_to_SVs.py -COPY bincov_to_normCov.R /src/bincov_to_normCov.R -COPY add_GC_anno_to_bed.R /src/add_GC_anno_to_bed.R -COPY Modify_vcf_by_steps.py /src/Modify_vcf_by_steps.py -COPY integrate_annotations.R /src/integrate_annotations.R - - diff --git a/dockerfiles/rdpesr/Modify_vcf_by_steps.py b/dockerfiles/rdpesr/Modify_vcf_by_steps.py deleted file mode 100755 index 99c939a79..000000000 --- a/dockerfiles/rdpesr/Modify_vcf_by_steps.py +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# - -""" -""" - -import pysam -import argparse - - -def modify_vcf(vcf_in_file, vcf_out_file, step_size, contig): - vcf_in = pysam.VariantFile(vcf_in_file) - vcf_out = pysam.VariantFile(vcf_out_file, 'w', header=vcf_in.header) - for rec in vcf_in: - rec.pos += step_size - rec.stop += step_size - if rec.pos > 0 and not rec.stop > contig[rec.contig]: - vcf_out.write(rec) - vcf_out.close() - vcf_in.close() - - -def contig_readin(contig): - out = {} - fin = open(contig) - for line in fin: - pin = line.strip().split() - out[pin[0]] = int(pin[1]) - fin.close() - return out - - -def main(): - parser = argparse.ArgumentParser( - description='Shift each variants in vcf by a fixed step.') - parser.add_argument('vcf_in', metavar='', type=str, - help='name of vcf file to be modified') - parser.add_argument('vcf_out', metavar='', type=str, - help='name of output vcf file') - parser.add_argument('-s', '--step_size', type=int, - help='size of step to be shifted.') - parser.add_argument('-c', '--contig', type=str, - help='contig files, or reference index.') - - args = parser.parse_args() - contig = contig_readin(args.contig) - modify_vcf(args.vcf_in, args.vcf_out, args.step_size, contig) - - -if __name__ == "__main__": - main() diff --git a/dockerfiles/rdpesr/RdTestV2.R b/dockerfiles/rdpesr/RdTestV2.R deleted file mode 100755 index 595826b3b..000000000 --- a/dockerfiles/rdpesr/RdTestV2.R +++ /dev/null @@ -1,1477 +0,0 @@ -#!/usr/bin/env Rscript - -#--------------------------- -# Batch CNV Interval Genotyping -# Talkowski Laboratory -# -# Harrison Brand, Ryan Collins, and Joseph Glessner -# Update May 2017 (implementation for SV-pipeline) -# Update June 2016 (clean modules, smaller windows, multiallelic binning, denovo visual) -# Update October 2015 (load sample set prior) -# Update August 2015 (for incorporation into Holmes liWGS-SV 1.0) -#--------------------------- - -#Loads required packages; installs if necessary -RPackages <- c("optparse", "plyr", "MASS", "zoo","methods","metap", "e1071", "fpc", "BSDA", "DAAG", "pwr", "reshape", "perm", "hash") -for (i in RPackages) -{ - if (i %in% rownames(installed.packages()) == FALSE) { - response <- readline("Install Required package (Y/N):") - if (response == "Y") { - install.packages((i), repos = "http://cran.rstudio.com") - library(i, character.only = TRUE) - } else { - stop (paste("Unable to run script without package: ", i, sep = "")) - } - } else { - library(i, character.only = TRUE) - } -} - -##build a list of command line options## -list <- structure(NA, class = "result") -"[<-.result" <- function(x, ..., value) { - args <- as.list(match.call()) - args <- args[-c(1:2, length(args))] - length(value) <- length(args) - for (i in seq(along = args)) { - a <- args[[i]] - if (!missing(a)) - eval.parent(substitute(a <- v, list(a = a, v = value[[i]]))) - } - x -} - -#Command line options - -option_list = list( - make_option(c("-b", "--bed"), type="character", default=NULL, - help="Bed file of CNVs to check. No header. Locus ID as fourth column. SampleIDs of interest comma delimited as fifth column. CNVtype (DEL,DUP) as the sixth column", metavar="character"), - make_option(c("-c", "--coveragefile"), type="character", default=NULL, - help="Full path to 1kb or 100bp binned coverage matrix for entire cohort", metavar="character"), - make_option(c("-x", "--coveragepath"), type = 'character', default = NULL, - help = "Folder including all 1kb or 100bp binned coverage matrix for entire cohort", metavar = "character"), - make_option(c("-m", "--medianfile"), type="character", default=NULL, - help="Full path to median intensity file with values for entire cohort", metavar="character"), - make_option(c("-f", "--famfile"), type="character", default=NULL, - help="Fam file FamID IndividualID(InCNVCallFile) FatherID MotherID Gender(1=male,2=female) Affected(1=unaffected,2=affected,-9=exclude)", metavar="character"), - make_option(c("-o", "--outFolder"), type="character", default="./", - help="Optional:Output folder", metavar="character"), - make_option(c("-n", "--outputname"), type="character", default="out", - help="Optional: Output file name for genotyping matrix.", metavar="character"), - make_option(c("-r", "--refgeno"), type="character", default=NULL, - help="Optional: File with precomputed genotype cutoffs; Requires -g TRUE", metavar="character"), - make_option(c("-y", "--poorbincov"), type="character", default=NULL, - help="Optional: Remove poor bin cov regions from cov matrix File; Requires no compression of bins which can be achieved by high -i (i.e -i 1000000) ", metavar="character"), - make_option(c("-v", "--geno_adjust"), type="logical", default=FALSE, - help="Optional:Ajust median CNV call to better match predicted CNV median from genotype cutoffs ; Requires -g TRUE and -r TRUE. Default:FALSE", metavar="logical"), - make_option(c("-g", "--rungenotype"), type="logical", default=FALSE, - help="Optional:Peform genotyping on the cohort Default:FALSE; Requires -r TRUE ", metavar="logical"), - make_option(c("-d", "--denovo"), type="logical", default=FALSE, - help="Optional:Call de novo per family (must only be single sample) Default:FALSE", metavar="logical"), - make_option(c("-i", "--bins"), type="numeric", default=10, - help="Optional:Number of bins", metavar="numeric"), - make_option(c("-p", "--plot"), type="logical", default=FALSE, - help="Optional:Plot JPG visualizations of CNV. Default:FALSE", metavar="logical"), - make_option(c("-a", "--plotfamily"), type="logical", default=FALSE, - help="Optional:Plot family based JPG visualizations; Requires -d TRUE. Default:FALSE", metavar="logical"), - make_option(c("-j", "--runKmeans"), type="logical", default=FALSE, - help="Optional: Run Kmeans", metavar="logical"), - make_option(c("-e", "--Kintervalstart"), type="numeric", default=0.1, - help="Optional:Lowest intesity diffrence between centers you want to test in kmeans Default:0.1", metavar="numeric"), - make_option(c("-q", "--Kintervalend"), type="numeric", default=1, - help="Optional:Highest intesity diffrence between centers you want to test in kmeans Default:1", metavar="numeric"), - make_option(c("-t", "--Kinterval"), type="numeric", default=0.1, - help="Optional:Intervals of intesity to test between interval start and end for example (start=0.1, end=0.5, interval=0.1) will test centers seperated by each of the following (0.1,0.2,0.3,0.4,0.5) default=0.5", metavar="numeric"), - make_option(c("-k", "--plotK"), type="logical", default=FALSE, - help="Optional:Plot JPG visualization of copy state (requires -j TRUE if want to plot kmeans) . Default:FALSE", metavar="logical"), - make_option(c("-s", "--sizefilter"), type="numeric", default=1000000, - help="Optional:Restrict to large CNV to inner specified size Default:1000000", metavar="numeric"), - make_option(c("-u", "--quartetDenovo"), type="logical", default=FALSE, - help="Proband,Father,Mother, & Sib de novo analysis", metavar="logical"), - make_option(c("-z", "--mosaicsep"), type="logical", default=FALSE, - help="Optional:Change sep calculation to a maxium rather than medium for determing mosaic variants", metavar="logical"), - make_option(c("-l", "--Blacklist"), type="character", default=NULL, - help="Optional:Single column file with blacklist of samples to remove", metavar="character"), - make_option(c("-w", "--Whitelist"), type="character", default=NULL, - help="Optional:Single column file with whitelist of samples to include", metavar="character") -); - -opt_parser = OptionParser(option_list = option_list) -opt = parse_args(opt_parser) - -##QC check, see file inputs exist and are formated correctly and edit if neccessary## - -##If bed file is blank just output header## -bedlinecount=tryCatch(read.table(opt$bed), error=function(e) NULL) -if ( is.null(bedlinecount)) { - if (opt$denovo==FALSE) { - if(!file.exists(paste(opt$outFolder,opt$outputname,".metrics",sep=""))) { - ##write header## - write.table(matrix(c("chr","Start","End","CNVID","SampleIDs","Type","Median_Power","P","2ndMaxP","Model","Median_Rank","Median_Separation"),nrow=1),paste(opt$outFolder, opt$outputname, ".metrics", sep = ""),quote=FALSE,row.names=FALSE,col.names=FALSE,sep= "\t") - } - } else { - if(!file.exists(paste(opt$outFolder, opt$outputname,".denovo",sep=""))) { - ##write header for de novo## - if (opt$quartetDenovo==TRUE) { - write.table(matrix(c("chr","Start","End","CNVID","Type","Family","AffectedMember","Pro.P","Sib.P","Fa.P","Mo.P","Pro.secMaxP","Sib.secMaxP","Fa.secMaxP","Mo.secMaxP","Pro.Sep","Sib.Sep","Fa.Sep","Mo.Sep","Pro.rank","Sib.rank","Fa.rank","Mo.rank"),nrow=1),paste(opt$outFolder, opt$outputname, ".denovo", sep = ""),quote=FALSE,row.names=FALSE,col.names=FALSE,sep= "\t") - } else { write.table(matrix(c("chr","Start","End","CNVID","SampleIDs","Type","Median_Power","P","2ndMaxP","Model","Median_Rank","Median_Separation"),nrow=1),paste(opt$outFolder, opt$outputname, ".denovo", sep = ""),quote=FALSE,row.names=FALSE,col.names=FALSE,sep= "\t") } - } - } - quit() -} - -##make sure bed and file coverage exist## -if (is.null(opt$bed) || is.null(opt$coveragepath)){ - print_help(opt_parser) - stop("At least two arguments must be supplied (input bed and coverage file).", - call. = FALSE) -} -#if (is.null(opt$bed) || is.null(opt$coveragefile)) { -# print_help(opt_parser) -# stop("At least two arguments must be supplied (input bed and coverage file).", -# call. = FALSE) -#} - -##Add slash to output folder if necessary## -if (substr(opt$outFolder, nchar(opt$outFolder), nchar(opt$outFolder) + 1) != - "/") { - opt$outFolder = paste(opt$outFolder, "/", sep = "") -} - -##check bed## -#Loads regions: chr start end locusID sampleID1,sampleID2,... -intervals <- read.table(opt$bed, sep = "\t", header = F) - -#Make start and end numeric -intervals[, c(2:3)] <- - apply(intervals[, c(2:3)], 2, function(vals) { - return(as.numeric(as.character(vals))) - }) - -##CNVtype check## -if (length(grep("del",intervals[,6],ignore.case = TRUE))+ - length(grep("dup",intervals[,6],ignore.case = TRUE))0) { - stop("INPUT ERROR: Improper input coordinates. End must be greater than start.") -} - -#Make sure cov bed is linked to gzipped file and tabix ready## -for(i in list.files(opt$coveragepath)){ - if (length(grep ("gz$", i))<1 & length(grep ("tbi$", i))<1){ - stop("Error cov file is not gzipped") - } -} - -#if (length(grep ("gz$",opt$coveragefile))<1) -#{ -# stop("Error cov file is not gzipped") -#} - -if (length(paste(opt$coveragefile,".tbi",sep=""))<1) -{ - stop("Error cov file is missing tabix index") -} - - -#Ensure Standard Decimal Notation -options(scipen = 1000) - -##RdTest functions - -#Rebinning helper function (df=dataframe,compression amount) -rebin <- function(df, compression) { - Chr <- df[1, 1] - Start <- df[1, 2] - End <- df[compression, 3] - for (i in 2:(floor(nrow(df) / compression))) { - Chr <- c(Chr, as.character(df[((i - 1) * compression) + 1, 1])) - Start <- c(Start, as.integer(df[((i - 1) * compression) + 1, 2])) - End <- c(End, as.integer(df[i * compression, 3])) - } - newvals <- apply(df[, 4:ncol(df)], 2, - function(vals, compression) { - newcol <- sum(vals[1:compression]) - for (i in 2:(floor(length(vals) / compression))) { - newcol <- - c(newcol, as.integer(sum(vals[(((i - 1) * compression) + 1):(i * compression)]))) - } - return(newcol) - }, compression) - return(as.data.frame(cbind(Chr, Start, End, newvals))) -} - - -#Reads coverage of specific queried region and compresses to a reasonable number of bins to create a region coverage matrix -#sampleIDs is comma specficed list of samples## - -loadData_old <- function(chr, start, end, cnvID, sampleIDs,coveragefile,medianfile,bins,poorbincov=NULL) - { - #Take the coverage matrix header and tabix query the region in the .gz coverage matrix - cov1 <-read.table(pipe(paste("tabix -h ",coveragefile," ", chr, ":", start, "-", end, " | sed 's/^#//'|sed 's/Start/start/g'|sed 's/Chr/chr/g'|sed 's/End/end/g'", sep = "")),sep = "\t", header = TRUE, check.names = FALSE) - #Load plotting values if median coverage file generated by bincov## - allnorm <- read.table(medianfile, header = TRUE, check.names = FALSE) - ##remove when start or end pull in extra tabix line## - cov1<-cov1[cov1$start!=end,] - cov1<-cov1[cov1$end!=start,] - #Check if no data - if (nrow(cov1) < 1) { - return("Failure") - } - #Find window bin size - BinSize <- cov1$end[1] - cov1$start[1] - - ##Find variants with with some missing bins because bincov blacklist## - if (nrow(cov1) < ((end - start) / BinSize)) { - Rfinal = round_any(end, BinSize, floor) - Rbeg = round_any(start, BinSize, ceiling) - column_start = matrix(seq(Rbeg, Rfinal, by = BinSize), ncol = 1) - column_end = matrix(seq(Rbeg + BinSize, Rfinal + BinSize, by = BinSize), ncol = 1) - ncov_col = ncol(cov1) - null_model <- - cbind(chr, column_start, column_end, matrix(rep(0, times = nrow(column_start) * - (ncov_col - 3)), ncol = ncov_col - 3)) - colnames(null_model) <- colnames(cov1) - covall <- rbind(cov1, null_model) - cov1 <- covall[!duplicated(covall[, 2]), ] - cov1 <- cov1[order(cov1[, 2]), ] - ##Use sapply to convert files to numeric only more than one column in cov1 matrix. If not matrix will already be numeric## - if (nrow(cov1) > 1) { - cov1 <- data.frame(sapply(cov1, as.numeric), check.names = FALSE) - } else {cov1<-data.frame(t(sapply(cov1,as.numeric)),check.names=FALSE)} - } - - - #Round down the number of used bins events for smaller events (e.g at 100 bp bins can't have 10 bins if event is less than 1kb) - if ((round_any(end, BinSize, floor) - round_any(start, BinSize, ceiling)) < bins * BinSize) - { - bins = (round_any(end, BinSize, floor) - round_any(start, BinSize, ceiling)) / - BinSize - if (bins <= 1) - { - Rstart <- round_any(start, BinSize, floor) - Rend <- round_any(end, BinSize, ceiling) - compression = 1 - } - } - - #Round bins to ensure even compression (10 bins at 100bp should have a Rstart-Rend of 1kb)## - if (!exists("compression")) - { - UnadjustedBins <- - (round_any(end, BinSize, floor) - round_any(start, BinSize, ceiling)) / - (bins * BinSize) - RemainderForRemoval <- - ##Need to account for round error by trunc so add the decimal#### - trunc((( - UnadjustedBins - trunc(UnadjustedBins) - ) * BinSize * bins / 2) + 0.000000001) - RemainderFront <- - round_any(RemainderForRemoval, BinSize, floor) - RemainderBack <- - round_any(RemainderForRemoval, BinSize, ceiling) - Rstart <- - round_any(start, BinSize, ceiling) + RemainderFront - Rend <- - round_any(end, BinSize, floor) - RemainderBack - compression <- (Rend - Rstart) / (BinSize * bins) - } - #Cut bins down to those required for compressed clean size based on Rstart and Rend## - cov1<-cov1[which(cov1[,3]>Rstart & cov1[,2] 0) - { - cat ( - " WARNING: IDs in samplesBlacklist but not coverage:", - IDsSamplesBlacklistButNotCoverage - ) - } - ##Filter samples based of specified blacklist here## - cov1 <- cov1[,!(names(cov1) %in% samplesBlacklist)] - allnorm <- - allnorm[,!(names(allnorm) %in% samplesBlacklist)] - } - - ##Allow whitelist## - if (!is.null(opt$Whitelist)) { - samplesWhitelist <- readLines(opt$Whitelist) - IDsSamplesWhitelistButNotCoverage <- - samplesWhitelist[!(samplesWhitelist %in% names(cov1))] - if (length(IDsSamplesWhitelistButNotCoverage) > 0) - { - cat ( - " WARNING: IDs in samplesWhitelist but not coverage:", - IDsSamplesWhitelistButNotCoverage - ) - } - ##make sure to still include first three lines### - cov1 <- cov1[,names(cov1) %in% c("chr","start","end",samplesWhitelist)] - allnorm <- - allnorm[, (names(allnorm) %in% samplesWhitelist)] - } - if (ncol(cov1) < 4) - { - stop (" WARNING: All samples excluded by filtering") - } - #Approximates rebinned per-sample medians (approximated for speed & memory) - allnorm[which(allnorm == 0)] <- 1 - allnorm <- compression * allnorm - - #Replace zero values with 1 for handling normalization - cov1[cov1 == 0] <- 1 - - ##restrict bins to those with unique mapping## - if (!is.null(poorbincov)) { - intervalfile=poorbincov - ##take off 10% on each of cnv for more accurate check for depth## - start10<-round(start+((end-start)*0.10)) - end10<-round(end-((end-start)*0.10)) - cov_exclude<-cov1[which(cov1[,3]<=start10 | cov1[,2]>=end10),] - - ##pull outcoord that fail## - file.length<-tryCatch(read.table(pipe(paste("tabix -h ",intervalfile ," ", chr, ":", start10, "-", end10, sep = "")),sep = "\t"),error=function(e) NULL) - - passing_int<-c(paste(cov_exclude[,1],"_",cov_exclude[,2],"_",cov_exclude[,3],sep=""),as.character(file.length[,4])) - - #don't include poor region filter but still shave 10%### - passing_int_noregion<-c(paste(cov_exclude[,1],"_",cov_exclude[,2],"_",cov_exclude[,3],sep="")) - - ##remove failing bins from coverage file## - cov2<-cov1[-which(paste(cov1[,1],"_",cov1[,2],"_",cov1[,3],sep="") %in% passing_int),] - cov3<-cov1[-which(paste(cov1[,1],"_",cov1[,2],"_",cov1[,3],sep="") %in% passing_int_noregion),] - ##must have at least 10 bins after filtering or exclude## - if (nrow(cov2) >9) { - cov1<-cov2 - } else if (nrow(cov3) >9) { - cov1<-cov3 - } - - } - - #Rebins values - if (compression > 1) { - res <- - rebin(cov1, compression) - res <- - apply(res[, 4:ncol(res)], 2, function(val) { - as.numeric(as.matrix(val)) - }) - } else { - res <- cov1[, 4:ncol(cov1)] - } - - #Adds sample medians to df - res0<-rbind((res), allnorm) - - #Scale each col within that sample - res1<- apply(res0,2, - function(vals){ - return(as.numeric(vals[1:(nrow(res0)-1)])/as.numeric(vals[nrow(res0)])) - }) - - #need to transpose if more than one bin assessed - if (ncol(as.matrix(res1)) > 1) { - cnv_matrix <- t(res1) - } else { - cnv_matrix <- as.matrix(res1) - } - return(cnv_matrix) - } - -loadData <- function(chr, start, end, cnvID, sampleIDs,coveragepath,medianfile,bins,poorbincov=NULL) - { - #Take the coverage matrix header and tabix query the region in the .gz coverage matrix - coveragefile_list = c() - for(i in list.files(coveragepath)){ - if(!grepl('.tbi',i) & grepl('gz',i)){ - coveragefile = file.path(coveragepath,i) - coveragefile_list = c(coveragefile_list, coveragefile) - } - } - coveragefile = coveragefile_list[1] - cov1 <-read.table(pipe(paste("tabix -h ",coveragefile," ", chr, ":", start, "-", end, " | sed 's/^#//'|sed 's/Start/start/g'|sed 's/Chr/chr/g'|sed 's/End/end/g'", sep = "")),sep = "\t", header = TRUE, check.names = FALSE) - if(length(coveragefile_list)>1){ - for(coveragefile in coveragefile_list){ - if(coveragefile!=coveragefile_list[1]){ - cov2 = read.table(pipe(paste("tabix -h ",coveragefile," ", chr, ":", start, "-", end, " | sed 's/^#//'|sed 's/Start/start/g'|sed 's/Chr/chr/g'|sed 's/End/end/g'", sep = "")),sep = "\t", header = TRUE, check.names = FALSE) - cov1 = cbind(cov1, cov2[,c(4:ncol(cov2))]) - } - } - } - - #cov1 <-read.table(pipe(paste("tabix -h ",coveragefile," ", chr, ":", start, "-", end, " | sed 's/^#//'|sed 's/Start/start/g'|sed 's/Chr/chr/g'|sed 's/End/end/g'", sep = "")),sep = "\t", header = TRUE, check.names = FALSE) - #Load plotting values if median coverage file generated by bincov## - allnorm <- read.table(medianfile, header = TRUE, check.names = FALSE) - ##remove when start or end pull in extra tabix line## - cov1<-cov1[cov1$start!=end,] - cov1<-cov1[cov1$end!=start,] - #Check if no data - if (nrow(cov1) < 1) { - return("Failure") - } - #Find window bin size - BinSize <- cov1$end[1] - cov1$start[1] - - ##Find variants with with some missing bins because bincov blacklist## - if (nrow(cov1) < ((end - start) / BinSize)) { - Rfinal = round_any(end, BinSize, floor) - Rbeg = round_any(start, BinSize, ceiling) - column_start = matrix(seq(Rbeg, Rfinal, by = BinSize), ncol = 1) - column_end = matrix(seq(Rbeg + BinSize, Rfinal + BinSize, by = BinSize), ncol = 1) - ncov_col = ncol(cov1) - null_model <- - cbind(chr, column_start, column_end, matrix(rep(0, times = nrow(column_start) * - (ncov_col - 3)), ncol = ncov_col - 3)) - colnames(null_model) <- colnames(cov1) - covall <- rbind(cov1, null_model) - cov1 <- covall[!duplicated(covall[, 2]), ] - cov1 <- cov1[order(cov1[, 2]), ] - ##Use sapply to convert files to numeric only more than one column in cov1 matrix. If not matrix will already be numeric## - if (nrow(cov1) > 1) { - cov1 <- data.frame(sapply(cov1, as.numeric), check.names = FALSE) - } else {cov1<-data.frame(t(sapply(cov1,as.numeric)),check.names=FALSE)} - } - - - #Round down the number of used bins events for smaller events (e.g at 100 bp bins can't have 10 bins if event is less than 1kb) - if ((round_any(end, BinSize, floor) - round_any(start, BinSize, ceiling)) < bins * BinSize) - { - bins = (round_any(end, BinSize, floor) - round_any(start, BinSize, ceiling)) / - BinSize - if (bins <= 1) - { - Rstart <- round_any(start, BinSize, floor) - Rend <- round_any(end, BinSize, ceiling) - compression = 1 - } - } - - #Round bins to ensure even compression (10 bins at 100bp should have a Rstart-Rend of 1kb)## - if (!exists("compression")) - { - UnadjustedBins <- - (round_any(end, BinSize, floor) - round_any(start, BinSize, ceiling)) / - (bins * BinSize) - RemainderForRemoval <- - ##Need to account for round error by trunc so add the decimal#### - trunc((( - UnadjustedBins - trunc(UnadjustedBins) - ) * BinSize * bins / 2) + 0.000000001) - RemainderFront <- - round_any(RemainderForRemoval, BinSize, floor) - RemainderBack <- - round_any(RemainderForRemoval, BinSize, ceiling) - Rstart <- - round_any(start, BinSize, ceiling) + RemainderFront - Rend <- - round_any(end, BinSize, floor) - RemainderBack - compression <- (Rend - Rstart) / (BinSize * bins) - } - #Cut bins down to those required for compressed clean size based on Rstart and Rend## - cov1<-cov1[which(cov1[,3]>Rstart & cov1[,2] 0) - { - cat ( - " WARNING: IDs in samplesBlacklist but not coverage:", - IDsSamplesBlacklistButNotCoverage - ) - } - ##Filter samples based of specified blacklist here## - cov1 <- cov1[,!(names(cov1) %in% samplesBlacklist)] - allnorm <- - allnorm[,!(names(allnorm) %in% samplesBlacklist)] - } - - ##Allow whitelist## - if (!is.null(opt$Whitelist)) { - samplesWhitelist <- readLines(opt$Whitelist) - IDsSamplesWhitelistButNotCoverage <- - samplesWhitelist[!(samplesWhitelist %in% names(cov1))] - if (length(IDsSamplesWhitelistButNotCoverage) > 0) - { - cat ( - " WARNING: IDs in samplesWhitelist but not coverage:", - IDsSamplesWhitelistButNotCoverage - ) - } - ##make sure to still include first three lines### - cov1 <- cov1[,names(cov1) %in% c("chr","start","end",samplesWhitelist)] - allnorm <- - allnorm[, (names(allnorm) %in% samplesWhitelist)] - } - if (ncol(cov1) < 4) - { - stop (" WARNING: All samples excluded by filtering") - } - #Approximates rebinned per-sample medians (approximated for speed & memory) - allnorm[which(allnorm == 0)] <- 1 - allnorm <- compression * allnorm - - #Replace zero values with 1 for handling normalization - cov1[cov1 == 0] <- 1 - - ##restrict bins to those with unique mapping## - if (!is.null(poorbincov)) { - intervalfile=poorbincov - ##take off 10% on each of cnv for more accurate check for depth## - start10<-round(start+((end-start)*0.10)) - end10<-round(end-((end-start)*0.10)) - cov_exclude<-cov1[which(cov1[,3]<=start10 | cov1[,2]>=end10),] - - ##pull outcoord that fail## - file.length<-tryCatch(read.table(pipe(paste("tabix -h ",intervalfile ," ", chr, ":", start10, "-", end10, sep = "")),sep = "\t"),error=function(e) NULL) - - passing_int<-c(paste(cov_exclude[,1],"_",cov_exclude[,2],"_",cov_exclude[,3],sep=""),as.character(file.length[,4])) - - #don't include poor region filter but still shave 10%### - passing_int_noregion<-c(paste(cov_exclude[,1],"_",cov_exclude[,2],"_",cov_exclude[,3],sep="")) - - ##remove failing bins from coverage file## - cov2<-cov1[-which(paste(cov1[,1],"_",cov1[,2],"_",cov1[,3],sep="") %in% passing_int),] - cov3<-cov1[-which(paste(cov1[,1],"_",cov1[,2],"_",cov1[,3],sep="") %in% passing_int_noregion),] - ##must have at least 10 bins after filtering or exclude## - if (nrow(cov2) >9) { - cov1<-cov2 - } else if (nrow(cov3) >9) { - cov1<-cov3 - } - - } - - #Rebins values - if (compression > 1) { - res <- - rebin(cov1, compression) - res <- - apply(res[, 4:ncol(res)], 2, function(val) { - as.numeric(as.matrix(val)) - }) - } else { - res <- cov1[, 4:ncol(cov1)] - } - - #Adds sample medians to df - res0<-rbind((res), allnorm[colnames(res)]) - - #Scale each col within that sample - res1<- apply(res0,2, - function(vals){ - return(as.numeric(vals[1:(nrow(res0)-1)])/as.numeric(vals[nrow(res0)])) - }) - - #need to transpose if more than one bin assessed - if (ncol(as.matrix(res1)) > 1) { - cnv_matrix <- t(res1) - } else { - cnv_matrix <- as.matrix(res1) - } - return(cnv_matrix) - } - -#Loads specified sample set in genotyping matrix based on the specified cnv type (del=1,dup=3) and unspecified samples as cn=2 -#sampleIDs is comma specficed list of samples## -specified_cnv <- function(cnv_matrix, sampleIDs, cnvID, chr, start, end, cnvtype) - { - CNV <- matrix(c(cnvID, chr, start, end), nrow = 1) - genotype_matrix <- cbind(CNV, t(matrix(seq(1, nrow(cnv_matrix))))) - colnames(genotype_matrix) <- c("ID", "Chr", "Start", "End", rownames(cnv_matrix)) - samplenames <- colnames(as.matrix(genotype_matrix)) - columnswithsamp <- which(colnames(genotype_matrix) %in% unlist(strsplit(as.character(sampleIDs),split=","))) - if (length(columnswithsamp)==0) { - ##"WARNING: No samples in coverage matrix for comparision check black/whitelist"## - return ("No_Samples") - } - - ##create genotype matrix## - if (toupper(cnvtype) == "DEL") - { - genotype_matrix[1, columnswithsamp] = 1 - ##make sure first four columns are not modified## - columnswithsamp <- c(columnswithsamp, 1, 2, 3, 4) - genotype_matrix[1,-columnswithsamp] = 2 - } else if (toupper(cnvtype) == "DUP") - { - genotype_matrix[1, columnswithsamp] = 3 - ##make sure first four columns are not modified## - columnswithsamp <- c(columnswithsamp, 1, 2, 3, 4) - genotype_matrix[1,-columnswithsamp] = 2 - } - return(genotype_matrix) - } - -###Kmeans multi-CNV Test## -##interval is measured by predicted copy state## -kMeans <-function(cnv_matrix,chr,start,end,cnvID,Kinterval,Kintervalstart,Kintervalend,outFolder,outputname) - { - samplenames <- rownames(cnv_matrix) - #create Eucledian matrix### - eucledianM <- dist(cnv_matrix, method = "euclidean") - # counts clusters for different attempts at starting points for k-means k values - ks = 0 - #avg. silhouette width to measure success of each run - avg.silwidth = 0 - count=0 - if (length(samplenames) > 100) { - totalcopystate <- 100 - } else{ - totalcopystate <- length(samplenames)-1 - } - for (i in seq(Kintervalstart, Kintervalend, Kinterval)) { - count=count+1 - ##Need to make sure there are not more copy states than samples (max at a copy state of 100)## - ##Run Kmeans## - k <-kmeans( - cnv_matrix, - ##center assignment## - matrix(rep(seq(0, i*totalcopystate , by = i),ncol(cnv_matrix)),ncol = ncol(cnv_matrix)), - iter.max = 100, - algorithm = "Forgy" - ) - # Number of clusters - ##Count number of centers with values## - a <- (count(k$centers > 0)) - ks[count] = a$freq[1] - ##Get Cluster Stats and average silwidth# - if (a$freq[1] > 1) - { - ClustSolStats = suppressWarnings(cluster.stats(eucledianM, k$cluster)) - avg.silwidth[count] = ClustSolStats$avg.silwidth - } else { - avg.silwidth[count] = 0 - } - } - ##Select best cluster width### - avg.silwidthINT = max(which(avg.silwidth == max(na.omit(avg.silwidth)))) - avg.silwidthClust = ks[avg.silwidthINT] - finalinterval <- - seq(Kintervalstart, Kintervalend, Kinterval)[avg.silwidthINT] - finalk <- kmeans(cnv_matrix, - ##center assignment## - matrix(rep( - seq(0, finalinterval * totalcopystate , by = finalinterval), - ncol(cnv_matrix) - ), ncol = ncol(cnv_matrix)), - iter.max = 100, - algorithm = "Forgy") - ##Make file to output K's## - KclusterAsGenotype = cbind(t(as.data.frame(list( - c( - ID = cnvID, - Chr = chr, - Start = start, - End = end - ) - ))), t(as.matrix(finalk$cluster))) - ##Count the number of copy states per SV## - Kclustercount=matrix(c(KclusterAsGenotype[,1:4],length(unique(KclusterAsGenotype[,5:ncol(KclusterAsGenotype)]))),nrow=1) - colnames(Kclustercount)<-c("ID","Chr","Start","End","N_CopyStates") - if(file.exists(paste(outFolder,outputname,".clustercount",sep=""))) { - #write.table(KclusterAsGenotype,paste(outFolder,outputname,".K",sep=""),quote=FALSE,append=TRUE,row.names=FALSE,col.names=FALSE,sep= "\t") - write.table(Kclustercount,paste(outFolder,outputname,".clustercount",sep=""),quote=FALSE,append=TRUE,row.names=FALSE,col.names=FALSE,sep= "\t") - } else { - #write.table(KclusterAsGenotype,paste(outFolder,outputname,".K",sep=""),quote=FALSE,row.names=FALSE,sep= "\t") - write.table(Kclustercount,paste(outFolder,outputname,".clustercount",sep=""),quote=FALSE,row.names=FALSE,sep= "\t") - } - - return(KclusterAsGenotype) - } - -#Seperate Samples into either Control or Treat group -#Number of bins assessed is dependent on SV sample size -create_groups <- function(genotype_matrix, cnv_matrix) -{ - ##Remove outer two bins which tend to be noisy if CNV large enough## - if (ncol(cnv_matrix) == 1) { - ##a start bin, b end bin## - Control <-cnv_matrix[which(genotype_matrix[, 5:ncol(genotype_matrix)] == 2), 1] - Treat <- cnv_matrix[which(genotype_matrix[, 5:ncol(genotype_matrix)] != 2), 1] - a<- 1 - b<- 1 - } else if (ncol(cnv_matrix) > 1 && ncol(cnv_matrix) < 4) { - a <- 1 - b <- ncol(cnv_matrix) - Control <- apply(cnv_matrix[which(genotype_matrix[, 5:ncol(genotype_matrix)] == 2), a:b, drop = F], 1, median) - Treat <-apply(cnv_matrix[which(genotype_matrix[, 5:ncol(genotype_matrix)] != 2), a:b, drop =F], 1, median) - } else { - a <- 2 - b <- ncol(cnv_matrix) - 1 - Control <- apply(cnv_matrix[which(genotype_matrix[, 5:ncol(genotype_matrix)] == 2), a:b, drop = F], 1, median) - Treat <- apply(cnv_matrix[which(genotype_matrix[, 5:ncol(genotype_matrix)] != 2), a:b, drop = F], 1, median) - } - output <- list(Control, Treat,a,b) - names(output) <- c("Control", "Treat","a","b") - return(output) -} - -#Power -powerCalc <- function(genotype_matrix, cnv_matrix) -{ - #Call Treat (have SV) and Control Groups - Control<-create_groups(genotype_matrix, cnv_matrix)$Control - Treat<-create_groups(genotype_matrix, cnv_matrix)$Treat - if (length(Control) > 1 && length(Treat) > 1) - { - #doesn't matter less or greater since absolute deviation, use 0.5 diffrence in mean between CNV to estimate effect size - power <- pwr.t2n.test(n1 = length(Control), n2 = length(Treat), sig.level = 0.05, - alternative = "greater", d = (0.5 / sd(Control)))$power - } else { - power <- NA - } - return(power) -} - -#OneSamplezscore to test single sample against everyone else; Can specifiy list of samples to exclude from analysis which may have CNV## -#samples exclude should be comma delimited list -#singlesample being assessed must not be normal(cn=2) in the genotype matrix### -onesamplezscore.median <- function(genotype_matrix,cnv_matrix,singlesample,cnvtype) -{ - #Call Treat (have SV) and Control Groups - Control<-create_groups(genotype_matrix, cnv_matrix)$Control - Treat<-create_groups(genotype_matrix, cnv_matrix)$Treat - Treat<-Treat[singlesample] - a<-create_groups(genotype_matrix, cnv_matrix)$a - b<-create_groups(genotype_matrix, cnv_matrix)$b - ##Calculate one-sided z score## - if (toupper(cnvtype) == "DEL") { - ztest.p <- pnorm((Treat - mean(Control)) / sd(Control)) - } else{ - ztest.p <- pnorm((mean(Control) - Treat) / sd(Control)) - } - ##Find the secondest worst p-value and record as an assement metric## - plist <- c() - i = 1 - for (column in a:b) - { - Control2 <- - cnv_matrix[which(genotype_matrix[, 5:ncol(genotype_matrix)] == 2), column] - Treat2 <- - cnv_matrix[singlesample, column] - if (toupper(cnvtype) == "DEL") { - single.p <- pnorm((Treat2 - mean(Control2)) / sd(Control2)) - } else { - single.p <- pnorm((mean(Control2) - Treat2) / sd(Control2)) - } - #store diffrent z p-value by column## - plist[i] <- single.p - i = i + 1 - } - if (length(plist) > 1) - { - mySecondMaxP <- sort(plist)[length(plist) - 1] - } else { - ##Note if only one bin, that bin is assigned as the second max P### - mySecondMaxP <- plist[1] - } - output <- list(ztest.p, mySecondMaxP) - names(output) <- c("singleZ_Pvalue", "Pmax_2nd") - return(output) -} - -#twosamplet t-test -twosamplezscore.median <- function(genotype_matrix,cnv_matrix,cnvtype) -{ - #Call Treat (have SV) and Control Groups - Control<-create_groups(genotype_matrix, cnv_matrix)$Control - Treat<-create_groups(genotype_matrix, cnv_matrix)$Treat - a<-create_groups(genotype_matrix, cnv_matrix)$a - b<-create_groups(genotype_matrix, cnv_matrix)$b - if (toupper(cnvtype) == "DEL") { - P_object <- permTS(Control, Treat, alternative = "greater", method = 'pclt')$p.value - } else{ P_object <- permTS(Control, Treat, alternative = "less", method = 'pclt')$p.value } - - ##Find the secondest worst p-value and record as an assement metric# - plist<-c() - i=1 - for (column in a:b) - { - Control2 <- cnv_matrix[which(genotype_matrix[, 5:ncol(genotype_matrix)] == 2), column] - Treat2 <- cnv_matrix[which(genotype_matrix[, 5:ncol(genotype_matrix)]!=2), column] - if (toupper(cnvtype) == "DEL") { - singlep <- permTS(Control2, Treat2, alternative = "greater", method = 'pclt')$p.value - } else{ - singlep <- permTS(Control2, Treat2, alternative = "less", method = 'pclt')$p.value - } - #store diffrent z p-value by column## - plist[i] <- singlep - i=i+1 - } - if (length(plist) > 1) - { - mySecondMaxP <- sort(plist)[length(plist) - 1] - } else { - ##Note if only one bin, that bin is assigned as the second max P### - mySecondMaxP <- plist[1] - } - output<-list(P_object, mySecondMaxP) - names(output)<-c("Pvalue","Pmax_2nd") - return(output) -} - -##Provide a depth based rank of the sample## -##sample you want to pull out information, if NULL than will do treat vs control## -samprank_sep <- function(genotype_matrix,cnv_matrix,cnvtype,sample=NULL) -{ - #Call Treat (have SV) and Control Groups - Control<-create_groups(genotype_matrix, cnv_matrix)$Control - Treat<-create_groups(genotype_matrix, cnv_matrix)$Treat - combined<-c(Treat,Control) - if (toupper(cnvtype) == "DEL") - { - order.rank <- median(rank(combined)[1:length(Treat)]) - ##Seperation between Treatment and Control groups - Sep = median(Control) - median(Treat) - } else { - order.rank <- - length(combined) - median(rank(combined)[1:length(Treat)]) + 1 - Sep = median(Treat) - median(Control) - } - ##allows to specify individaul sample to test### - if (!is.null(sample)) { - if (toupper(cnvtype) == "DEL") - { - order.rank = unname(rank(combined)[sample]) - Sep = median(Control) - median(combined[sample]) - } else { - order.rank <- length(combined) - unname(rank(combined)[sample]) + 1 - Sep = median(combined[sample]) - median(Control) - } - } - #If mosaic flag then replace median sep test with a max test## - if (opt$mosaicsep == TRUE) { - a <- create_groups(genotype_matrix, cnv_matrix)$a - b <- create_groups(genotype_matrix, cnv_matrix)$b - seplist <- c() - i = 1 - for (column in a:b) - { - Control2 <- cnv_matrix[which(genotype_matrix[, 5:ncol(genotype_matrix)] == 2), column] - Treat2 <- cnv_matrix[which(genotype_matrix[, 5:ncol(genotype_matrix)]!=2), column] - combined2<-c(Treat2,Control2) - if (toupper(cnvtype) == "DEL") - { - Sep = median(Control2) - median(Treat2) - } else { - Sep = median(Treat2) - median(Control2) - } - if (!is.null(sample)) { - if (toupper(cnvtype) == "DEL") - { - Sep = median(Control2) - median(combined2[sample]) - } else { - Sep = median(combined2[sample]) - median(Control2) - } - } - seplist[i] <- Sep - i = i + 1 - } - Sep <- max(seplist) - } - output <- list(order.rank, Sep) - names(output) <- c("rank", "Sep") - return(output) -} - -##Plot of intensities across cohorts## -plotJPG <- function(genotype_matrix,cnv_matrix,chr,start,end,cnvID,sampleIDs,outputname,cnvtype,plotK,plotfamily,famfile,outFolder) -{ - samplesPrior <- unlist(strsplit(as.character(sampleIDs),",")) - samplenames<-colnames(genotype_matrix) - - ##If only one bin## - if(ncol(cnv_matrix)==1) - {cnv_matrix<-cbind(cnv_matrix,cnv_matrix[,1])} - - ##File Output## - jpeg(paste(outFolder,chr,"_",start,"_",end,"_",samplesPrior[1],"_",cnvID,"_",outputname,".jpg",sep=""),res=300, width=1800, height=1800) - - ##concatenate sample IDs if necessary## - sampleIDs<-paste(sampleIDs,collapse=",") - - ##Limits number of sample Ids due to size limiations for readablity - if(nchar(as.character(sampleIDs))>44){sampleIDsToDisplay<-paste(substr(sampleIDs,1,44),"...",sep="")}else{sampleIDsToDisplay<-sampleIDs} - ##Title line 1## - main1=paste(chr,":",prettyNum(start,big.mark=","),"-",prettyNum(end,big.mark=",")," (hg19)",sep="") - - ###Add proper size abbr. for larger events - size=end-start - if(size<10000){mysize<-prettyNum(paste("(",size," bp)",sep=""), big.mark = ",")} - if(size>=10000){mysize<-prettyNum(paste("(",signif(size/1000,3)," kb)",sep=""), big.mark = ",")} - if(size>=1000000){mysize<-prettyNum(paste("(",signif(size/1000000,3)," Mb)",sep=""), big.mark = ",")} - - ##Formating## - main2 = paste(sampleIDsToDisplay, " ", mysize, sep ="") - mainText = paste(main1, "\n", main2, sep = "") - maxcexXa <- par('pin')[1] / strwidth(main1, 'inches') - maxcexXb <- par('pin')[1] / strwidth(main2, 'inches') - maxcexXh <- min(maxcexXa, maxcexXb) - if (maxcexXh < 0.5) { - maxcexXh = 0.5 - } - if (maxcexXh > 2.5) { - maxcexXh = 2.5 - } - par(mar = c(6.1, 6.1, 4.1, 2.1)) - - ##Create matrix for plotting### - columnstoshift <- which(rownames(cnv_matrix) %in% samplesPrior) - ##Place Samples with CNV on top### - plot_cnvmatrix<-cbind(t(cnv_matrix)[,-columnstoshift],t(cnv_matrix)[,columnstoshift]) - ##column shift diffrent for genotype matrix because cnvID,chr,start,end - columnstoshift <- which(colnames(genotype_matrix) %in% unlist(strsplit(as.character(samplesPrior),split=","))) - plot_colormatrix<-cbind(matrix(genotype_matrix[,-columnstoshift],nrow=1),matrix(genotype_matrix[,columnstoshift],nrow=1)) - endcolnormal<-ncol(plot_colormatrix)-(length(samplesPrior)) - plot_linematrix<-cbind(matrix(genotype_matrix[,-columnstoshift],nrow=1),matrix(genotype_matrix[,columnstoshift],nrow=1)) - - ##Blue if Dup; Red if Del - if ( plotK == TRUE ) { - #keep plot_colormatrix - main1=paste(chr,":",prettyNum(start,big.mark=","),"-",prettyNum(end,big.mark=",")," (hg19)",sep="") - mainText = paste(main1, "\n", "Copy Estimate"," ", mysize, sep = "") - plot_linematrix[,5:ncol(plot_linematrix)]<-"0.5" - } else if (toupper(cnvtype) == "DEL") { - plot_colormatrix[, (endcolnormal + 1):ncol(plot_colormatrix)] <- "red" - plot_colormatrix[,5:endcolnormal]<-"grey" - plot_linematrix[, (endcolnormal + 1):ncol(plot_colormatrix)] <- "3" - plot_linematrix[,5:endcolnormal]<-"0.5" - } else if (toupper(cnvtype) == "DUP") { - plot_colormatrix[, (endcolnormal + 1):ncol(plot_colormatrix)] <- "blue" - plot_colormatrix[,5:endcolnormal]<-"grey" - plot_linematrix[, (endcolnormal + 1):ncol(plot_colormatrix)] <- "3" - plot_linematrix[,5:endcolnormal]<-"0.5" - } - - ##Plotting Command## - plot(as.zoo(plot_cnvmatrix), - plot.type = "single", - col = plot_colormatrix[1, 5:ncol(plot_colormatrix)], - main = mainText, - cex.main = maxcexXh, - xlab = "Position (bp)", - xaxt = 'n', - ann = FALSE, - ylab = "Intensity", - lwd = plot_linematrix[1, 5:ncol(plot_linematrix)] - ) - mtext( - side = 1, - text = paste( chr, " Position (bp)", sep = ""), - line = 5 - ) - mtext(side = 2, text = "Normalized Read Depth Ratio", line = 3) - myIntervalsXAxis <- round(seq(start,end,length.out=ncol(cnv_matrix))) - axis(1,at=seq(1,ncol(cnv_matrix),by=1) ,labels = prettyNum(myIntervalsXAxis, big.mark = ","),las = 2,cex.axis = 0.8) - ##Family-Based Plotting## - if (plotfamily == TRUE ) { - ##Call familes to plot### - ##May have issues with multi-generation pedigress, Designed for Quad and Trio Families## - family <- read.table(famfile) - includedfams <- - unique(family[which(family[, 2] %in% samplesPrior), 1]) - proband_list <-as.character( - family[which(family[, 1] %in% includedfams & - family[, 3] != 0 & - family[, 4] != 0 & family[, 6] == 2) , 2]) - sib_list <-as.character( - family[which(family[, 1] %in% includedfams & - family[, 3] != 0 & - family[, 4] != 0 & family[, 6] == 1) , 2]) - father_list <-as.character( - family[which(family[, 1] %in% includedfams & - family[, 3] == 0 & - family[, 4] == 0 & family[, 5] == 1 & family[, 6] == 1) , 2]) - mother_list <-as.character( - family[which(family[, 1] %in% includedfams & - family[, 3] == 0 & - family[, 4] == 0 & family[, 5] == 2 & family[, 6] == 1) , 2]) - - text(c(1:10), as.numeric(cnv_matrix[proband_list,]), "p", cex = 1) - text(c(1:10), as.numeric(cnv_matrix[sib_list,]), "s", cex = 1) - text(c(1:10), as.numeric(cnv_matrix[father_list,]), "fa", cex = 1) - text(c(1:10), as.numeric(cnv_matrix[mother_list,]), "mo", cex = 1) - } - if (plotK == TRUE) { - copy_states = as.numeric(unique(plot_colormatrix[1,5:ncol(plot_colormatrix)])) - legend( - ifelse(toupper(cnvtype) == "DEL", 'topright', 'bottomright'), - paste("CN", sort(copy_states-1)) , - lty = 1, - col = sort(copy_states), - cex = .3 - ) - } else if (toupper(cnvtype) == "DEL") { - legend( - 'topright', - c("Deletion", "Diploid"), - lty = 1, - col = c("red", "grey"), - cex = .5 - ) - } else { - legend( - 'topright', - c("Diploid", "Duplication"), - lty = 1, - col = c("grey", "blue"), - cex = .5 - ) - } - dev.off() -} - -##Provide genotype for VCF format## -genotype<- function(cnv_matrix,genotype_matrix,refgeno,chr,start,end,cnvID,sampleIDs,cnvtype,outFolder,outputname,plot_cnvmatrix) -{ - ##get depth intensities## - cnv_median <-c(create_groups(genotype_matrix, cnv_matrix)$Control,create_groups(genotype_matrix, cnv_matrix)$Treat) - ##order by names so same geno output for each variant## - cnv_median<-cnv_median[order(names(cnv_median))] - cutoff_table <-read.table(refgeno, header = TRUE) - cutoffs <-unlist(cutoff_table[,4]) - ##assign copy states## - prev_cutoff=0 - max_medianstate<-max(cnv_median) - copystate<-cnv_median - for (i in 0:(length(cutoffs)-1)) { - copystate[which(cnv_median <= cutoffs[i+1] & cnv_median > prev_cutoff) ] <- i - prev_cutoff=cutoffs[i+1] - } - ##assign states above our cutoffs (0.25 increments)## - prev_cutoff=cutoffs[length(cutoffs)] - if (max_medianstate>cutoffs[length(cutoffs)]) { - for (i in (length(cutoffs):(ceiling(max_medianstate)/0.5))) { - copystate[which(cnv_median <= (i*0.5)+0.25 & cnv_median > prev_cutoff) ] <- i - prev_cutoff=(i*0.5)+0.25 - cutoff_table[i+1,]<-c(i,i*0.5,0,(i*0.5)+0.25) - } - } - - copystate.table<-table(unlist(copystate)) - - max_state<-names(copystate.table[order(copystate.table,decreasing=TRUE)][1]) - - correction<-cutoff_table[which(cutoff_table[,1]==max_state),2]-median(unlist(cnv_median[which(copystate==max_state)])) - - ##reassign copy state after correction## - if (opt$geno_adjust==TRUE) - { - cnv_median<-cnv_median+correction - ##anything negative because correction is reassignd 0## - cnv_median[which(cnv_median<0)]<-0 - ##do not adjust homozgous deletions upward## - if (correction>0){ - cnv_median[which(cnv_median-correction prev_cutoff) ] <- i - prev_cutoff=cutoffs[i+1] - } - ##assign states above our cutoffs (0.25 increments)## - prev_cutoff=cutoffs[length(cutoffs)] - if (max_medianstate>cutoffs[length(cutoffs)]) { - for (i in (length(cutoffs):(ceiling(max_medianstate)/0.5))) { - copystate[which(cnv_median <= (i*0.5)+0.25 & cnv_median > prev_cutoff) ] <- i - prev_cutoff=(i*0.5)+0.25 - cutoff_table[i+1,]<-c(i,i*0.5,0,(i*0.5)+0.25) - } - } - } - - ##create GQ## - #sd from copy state to determine GQ## - rd_sd<-cutoff_table[which(cutoff_table[,1]==2),3] - meanstate<-copystate - meanstatelow<-copystate-1 - meanstatehigh<-copystate+1 - - ##if copy state is 0 than only look at 1 when comparing GQ## - meanstatelow[which(meanstatelow<0)]<-1 - - ##add one more to max level of cutoff_table for GQ ## - maxrow=dim(cutoff_table)[1] - ##add 1 to maxrow because table starts at 0## - cutoff_table[maxrow+1,]<-c(maxrow,maxrow*0.5,0,(maxrow*0.5)+0.25) - - for (i in 0:(dim(cutoff_table)[1]-2)) { - mean<-cutoff_table[which(cutoff_table[,1]==i),2] - meanstatelow[which(meanstatelow==i)]<-mean - meanstatehigh[which(meanstatehigh==i)]<-mean - meanstate[which(meanstate==i)]<-mean - } - - - ztest_matrix<-apply(matrix(abs(cnv_median-meanstate)/rd_sd,nrow=1),2,function(x) -10*log10(1-pnorm(x) )) - ztestlow_matrix<-apply(matrix(abs(cnv_median-meanstatelow)/rd_sd,nrow=1),2,function(x) -10*log10(1-pnorm(x) )) - ztesthigh_matrix<-apply(matrix(abs(cnv_median-meanstatehigh)/rd_sd,nrow=1),2,function(x) -10*log10(1-pnorm(x) )) - - ##just in cast ztest_matix is Inf need to reassign to something high so subtraction below does not fail## - ztest_matrix[which(ztest_matrix==Inf)]<-1000000 - - gq_value<-round(pmin(ztestlow_matrix,ztesthigh_matrix)-ztest_matrix) - - ##Cap max gq at 999## - gq_value[which(gq_value>999)]<-999 - gq_value[which(gq_value<0)]<-1 - - ##per variant gq## - ##median z test compared to a ref copystate## - if(length(cnv_median[which(copystate!=2)])>1) { - ztest_median<-median(abs(cnv_median[which(copystate!=2)]-1)/rd_sd) - gq_var<- round(-10*log10(1-pnorm(ztest_median))) - if(gq_var>999) {gq_var<-999} - if(gq_var<0) {gq_var<-1} - ##assign anything without a call as having a gq of 0### - } else {gq_var<-0} - ##write out cnv medians for each sample## - if(!file.exists(paste(outFolder,outputname,".median_geno",sep=""))) { - ##write header## - write.table(matrix(c("chr","start","end","cnvID",names(cnv_median)),nrow=1),paste(outFolder, outputname, ".median_geno", sep = ""),quote=FALSE,row.names=FALSE,col.names=FALSE,sep= "\t") - } - write.table(matrix(c(chr, start, end, cnvID,cnv_median),nrow=1),paste(outFolder, outputname, ".median_geno", sep = ""), - quote = FALSE,col.names = FALSE, row.names = FALSE,append=TRUE,sep= "\t") - - ##write GQ## - if(!file.exists(paste(outFolder,outputname,".gq",sep=""))) { - ##write header## - write.table(matrix(c("chr","start","end","cnvID",names(cnv_median)),nrow=1),paste(outFolder, outputname, ".gq", sep = ""),quote=FALSE,row.names=FALSE,col.names=FALSE,sep= "\t") - } - write.table(matrix(c(chr, start, end, cnvID,gq_value),nrow=1),paste(outFolder, outputname, ".gq", sep = ""), - quote = FALSE,col.names = FALSE, row.names = FALSE,append=TRUE,sep= "\t") - - ##write variant GQ## - if(!file.exists(paste(outFolder,outputname,".vargq",sep=""))) { - ##write header## - write.table(matrix(c("chr","start","end","cnvID","variant_gq"),nrow=1),paste(outFolder, outputname, ".vargq", sep = ""),quote=FALSE,row.names=FALSE,col.names=FALSE,sep= "\t") - } - write.table(matrix(c(chr, start, end, cnvID,gq_var),nrow=1),paste(outFolder, outputname, ".vargq", sep = ""), - quote = FALSE,col.names = FALSE, row.names = FALSE,append=TRUE,sep= "\t") - - ##write genotype## - - if(!file.exists(paste(outFolder,outputname,".geno",sep=""))) { - ##write header## - write.table(matrix(c("chr","start","end","cnvID",names(copystate)),nrow=1),paste(outFolder, outputname, ".geno", sep = ""),quote=FALSE,row.names=FALSE,col.names=FALSE,sep= "\t") - } - write.table(matrix(c(chr, start, end, cnvID,copystate),nrow=1),paste(outFolder, outputname, ".geno", sep = ""), - quote = FALSE,col.names = FALSE, row.names = FALSE,append=TRUE,sep= "\t") - - ##plot genotypes## - if (opt$plotK==TRUE) { - ##plotting expect copy state >1 than predicted because kmeans code, this is corrected in final plot## - plot_matrix<-matrix(c(cnvID,chr, start, end,copystate+1),nrow=1) - colnames(plot_matrix)<-colnames(genotype_matrix) - plotJPG(plot_matrix,plot_cnvmatrix,chr,start,end,cnvID,sampleIDs,outputname,cnvtype,plotK=TRUE,plotfamily=FALSE,famfile,outFolder) - } - -} - -runRdTest<-function(bed) -{ - chr<-as.character(bed[1]) - start<-as.numeric(bed[2]) - end<-as.numeric(bed[3]) - cnvID<-as.character(bed[4]) - sampleIDs<-as.character(bed[5]) - sampleOrigIDs<-as.character(bed[5]) - cnvtype<-as.character(bed[6]) - cnvtypeOrigIDs<-as.character(bed[6]) - - ##Assign input values from opt list to variable## - for (names in names(opt)) - { - assign(names,unname(unlist(opt[names]))) - } - #Speed up large cnvs by taking inner range of laregest desired size - if (end - start > sizefilter) - { - cat(paste(chr,":",start,"-",end,":Large size so subsampling in middle\n",sep="")) - center=(start + end) / 2 - start = round(center - (sizefilter/2)) - end = round(center + (sizefilter/2)) - } - - if (end - start <= 0 ) - { - end=start+1 - } - ##Make sure region is in tabix## - - - ##Get Intesity Data## - if (exists("poorbincov")) { - cnv_matrix<-loadData(chr, start, end, cnvID, sampleIDs,coveragepath,medianfile,bins,poorbincov) - } else { - cnv_matrix<-loadData(chr, start, end, cnvID, sampleIDs,coveragepath,medianfile,bins) - } - - if (cnv_matrix[1]=="Failure") { - ##assign genotype if no coverage## - if (opt$rungenotype == TRUE && !is.null(opt$Whitelist)) { - samplesWhitelist <- readLines(opt$Whitelist) - ##make dots to indicate missing genotype or GQ## - dotlist<- samplesWhitelist - dotlist[1:length(dotlist)]<-"." - ##write out cnv medians for each sample (requires whitelist)## - if(!file.exists(paste(outFolder,outputname,".median_geno",sep=""))) { - ##write header## - write.table(matrix(c("chr","start","end","cnvID",samplesWhitelist),nrow=1),paste(outFolder, outputname, ".median_geno", sep = ""),quote=FALSE,row.names=FALSE,col.names=FALSE,sep= "\t") - } - write.table(matrix(c(chr, start, end, cnvID,dotlist),nrow=1),paste(outFolder, outputname, ".median_geno", sep = ""), - quote = FALSE,col.names = FALSE, row.names = FALSE,append=TRUE,sep= "\t") - - ##write GQ## - if(!file.exists(paste(outFolder,outputname,".gq",sep=""))) { - ##write header## - write.table(matrix(c("chr","start","end","cnvID",samplesWhitelist),nrow=1),paste(outFolder, outputname, ".gq", sep = ""),quote=FALSE,row.names=FALSE,col.names=FALSE,sep= "\t") - } - write.table(matrix(c(chr, start, end, cnvID,dotlist),nrow=1),paste(outFolder, outputname, ".gq", sep = ""), - quote = FALSE,col.names = FALSE, row.names = FALSE,append=TRUE,sep= "\t") - - ##write variant GQ## - if(!file.exists(paste(outFolder,outputname,".vargq",sep=""))) { - ##write header## - write.table(matrix(c("chr","start","end","cnvID","variant_gq"),nrow=1),paste(outFolder, outputname, ".vargq", sep = ""),quote=FALSE,row.names=FALSE,col.names=FALSE,sep= "\t") - } - write.table(matrix(c(chr, start, end, cnvID,"."),nrow=1),paste(outFolder, outputname, ".vargq", sep = ""), - quote = FALSE,col.names = FALSE, row.names = FALSE,append=TRUE,sep= "\t") - - ##write genotype## - - if(!file.exists(paste(outFolder,outputname,".geno",sep=""))) { - ##write header## - write.table(matrix(c("chr","start","end","cnvID",samplesWhitelist),nrow=1),paste(outFolder, outputname, ".geno", sep = ""),quote=FALSE,row.names=FALSE,col.names=FALSE,sep= "\t") - } - write.table(matrix(c(chr, start, end, cnvID,dotlist),nrow=1),paste(outFolder, outputname, ".geno", sep = ""), - quote = FALSE,col.names = FALSE, row.names = FALSE,append=TRUE,sep= "\t") - } - return(c(chr,start,end,cnvID,sampleOrigIDs,cnvtypeOrigIDs,"coverage_failure","coverage_failure","coverage_failure","coverage_failure","coverage_failure","coverage_failure")) - } - - ##remove black or white list samples from sampleIDs### - idsforsearch<-rownames(cnv_matrix) - samplestokeep<-match(unlist(strsplit(sampleIDs,",")),idsforsearch) - sampleIDs<-idsforsearch[na.omit(samplestokeep)] - samplesPrior <-unlist(strsplit(as.character(sampleIDs),split=",")) - ##Run K Test if Specified## - if (opt$runKmeans == TRUE) { - k_matrix<-kMeans(cnv_matrix,chr,start,end,cnvID,Kinterval,Kintervalstart,Kintervalend,outFolder,outputname) - if (opt$plotK==TRUE) { - plotJPG(k_matrix,cnv_matrix,chr,start,end,cnvID,sampleIDs,outputname,cnvtype,plotK,plotfamily=FALSE,famfile,outFolder) - } - } - ##Assign intial genotypes (del=1,dup=3,diploid=2)## - genotype_matrix<-specified_cnv(cnv_matrix, sampleIDs, cnvID, chr, start, end, cnvtype) - ##check if no samples are found in genotype matrix## - if (as.matrix(genotype_matrix)[1,1]=="No_Samples") { - return(c(chr,start,end,cnvID,sampleOrigIDs,cnvtypeOrigIDs,"No_samples_for_analysis","No_samples_for_analysis","No_samples_for_analysis","No_samples_for_analysis","No_samples_for_analysis","No_samples_for_analysis")) - } - - ##genotype and write to file## - if (opt$rungenotype == TRUE) { - ##Compress x-axis to 10 bins so it is easier to view### - plot_cnvmatrix<-loadData(chr, start, end, cnvID, sampleIDs,coveragepath,medianfile,bins=10) - genotype(cnv_matrix,genotype_matrix,refgeno,chr,start,end,cnvID,sampleIDs,cnvtype,outFolder,outputname,plot_cnvmatrix) - } - - ##QC on filtered sample counts## - copystatecounts=table(genotype_matrix[1,5:ncol(genotype_matrix)]) - ##diploid count## - dipcount=copystatecounts["2"] - cnvcount=copystatecounts[ifelse(toupper(cnvtype)=="DEL","1","3")] - if (is.na(dipcount)){ - return(c(chr,start,end,cnvID,sampleOrigIDs,cnvtypeOrigIDs,"All_samples_called_CNV_no_analysis","All_samples_called_CNV_no_analysis","All_samples_called_CNV_no_analysis","All_samples_called_CNV_no_analysis","All_samples_called_CNV_no_analysis","All_samples_called_CNV_no_analysis")) - } - ##Plot JPG## - if (opt$plot == TRUE){ - plotJPG(genotype_matrix,cnv_matrix,chr,start,end,cnvID,sampleIDs,outputname,cnvtype,plotK=FALSE,plotfamily=FALSE,famfile,outFolder) - } - ##De Novo Module## - if (opt$denovo == TRUE) { - ##Read in family file## - family <- read.table(famfile) - child<-family[which(family[, 3] != 0 & family[, 4] != 0 ) , 2] - samplesPrior <- samplesPrior[which(samplesPrior %in% child)] - ##If ID only has parents or all children removed by filtering## - if (length(samplesPrior) == 0 ) { - denovo_output <- cbind(chr,start,end,cnvID,cnvtype,"No_samples_for_analysis","No_samples_for_analysis","No_samples_for_analysis","No_samples_for_analysis","No_samples_for_analysis","No_samples_for_analysis","No_samples_for_analysis","No_samples_for_analysis","No_samples_for_analysis","No_samples_for_analysis", - "No_samples_for_analysis","No_samples_for_analysis","No_samples_for_analysis","No_samples_for_analysis", - "No_samples_for_analysis","No_samples_for_analysis","No_samples_for_analysis","No_samples_for_analysis") - - return(denovo_output) - } - includedfams <- unique(family[which(family[, 2] %in% samplesPrior), 1]) - original_cnv_matrix<-cnv_matrix - original_genotype_matrix<-genotype_matrix - ##Find samples to include## - proband_list <-as.character( - family[which(family[, 1] %in% includedfams & family[, 3] != 0 & family[, 4] != 0 & family[, 6] == 2) , 2]) - sib_list <-as.character( - family[which(family[, 1] %in% includedfams & family[, 3] != 0 & family[, 4] != 0 & family[, 6] == 1) , 2]) - father_list <-as.character( - family[which(family[, 1] %in% includedfams & family[, 3] == 0 & family[, 4] == 0 & family[, 5] == 1 & family[, 6] == 1) , 2]) - mother_list <-as.character( - family[which(family[, 1] %in% includedfams & family[, 3] == 0 & family[, 4] == 0 & family[, 5] == 2 & family[, 6] == 1) , 2]) - for (mem in c("mo","p1","s1","fa")) { - eval(parse(text=paste(mem,".p.list<-c()",sep=""))) - eval(parse(text=paste(mem,".secmaxp.list<-c()",sep=""))) - eval(parse(text=paste(mem,".rankp.list<-c()",sep=""))) - eval(parse(text=paste(mem,".sepp.list<-c()",sep=""))) - } - affecteded_fam<-c() - count=0 - fam_denovo.matrix<-c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA) - for (i in includedfams) { - count = count + 1 - for (singlesample in na.omit(c(proband_list[count],sib_list[count],father_list[count],mother_list[count]))) { - if (singlesample %in% proband_list) {mem="p1"} - if (singlesample %in% sib_list) {mem="s1"} - if (singlesample %in% father_list) {mem="fa"} - if (singlesample %in% mother_list) {mem="mo"} - ##Add family members to sample include list## - sampleID1s = unique(c( - as.character(sampleIDs), - father_list[count], - mother_list[count], - sib_list[count], - proband_list[count])) - ##gender restrict variants on X or Y### - if ((chr == "X" || - chr == "Y") && - (family[which(family[, 2] == singlesample), 5] == 2)) { - cnv_matrix <- - as.matrix(cnv_matrix[rownames(cnv_matrix) %in% family[which(family[, 5] == 2), 2], ]) - } else if ((chr == "X" || - chr == "Y") && - (family[which(family[, 2] == singlesample), 5] == 1)) { - cnv_matrix <- - as.matrix(cnv_matrix[rownames(cnv_matrix) %in% family[which(family[, 5] == 1), 2], ]) - } - ##remove sample of interest from sample exclude list and make new genotype matrix## - genotype_matrix<-specified_cnv(cnv_matrix, sampleID1s, cnvID, chr, start, end, cnvtype) - ##remove singlesample for exclusion list## - p <-onesamplezscore.median(genotype_matrix,cnv_matrix,singlesample,cnvtype) - ##write meteric for each family member - eval(parse(text=paste(mem,".p.list[count]<-", p[1],sep=""))) - eval(parse(text=paste(mem,".secmaxp.list[count]<-", p[2],sep=""))) - rank_sep<-samprank_sep(genotype_matrix,cnv_matrix,cnvtype,singlesample) - eval(parse(text=paste(mem,".rankp.list[count]<-", rank_sep[1],sep=""))) - eval(parse(text=paste(mem,".sepp.list[count]<-", rank_sep[2],sep=""))) - cnv_matrix<-original_cnv_matrix - fam_denovo.matrix<-rbind(fam_denovo.matrix,matrix(unlist(c(chr,start,end,cnvID,singlesample,cnvtype,"NA",p[1],p[2],"singlesampZ",rank_sep[1],rank_sep[2])),nrow=1)) - } - affecteded_fam[count]<-paste(unique(grep(i,unlist(strsplit(as.character(sampleIDs),split=",")),value=TRUE)),collapse=",") - if (opt$plotfamily==TRUE) { - sampleID2s<-paste(unique(grep(i,unlist(strsplit(as.character(sampleIDs),split=",")),value=TRUE)),collapse=",") - plotJPG(original_genotype_matrix,original_cnv_matrix,chr,start,end,cnvID,sampleIDs=sampleID2s,outputname=paste(outputname,"_",i,sep=""),cnvtype,plotK=FALSE,plotfamily,famfile,outFolder) - } - } - if (opt$quartetDenovo==TRUE) { - denovo_output <- cbind(chr,start,end,cnvID,cnvtype,includedfams,affecteded_fam,p1.p.list,s1.p.list,fa.p.list,mo.p.list, - p1.secmaxp.list,s1.secmaxp.list,fa.secmaxp.list,mo.secmaxp.list, - p1.sepp.list,s1.sepp.list,fa.sepp.list,mo.sepp.list, - p1.rankp.list,s1.rankp.list,fa.rankp.list,mo.rankp.list) - } else { denovo_output<-fam_denovo.matrix[2:nrow(fam_denovo.matrix),] } - return(denovo_output) - } - - ##Flip samples and cnvtype to that with the lowest frequency## - if(dipcount 1,power,NA) - if (!is.na(power) && power > 0.8) { - p <- twosamplezscore.median(genotype_matrix, cnv_matrix, cnvtype) - p[3]<-"twoSampPerm" - names(p)<-c("Pvalue","Pmax_2nd","Test") - } else { - ##Need to break down underpowerd samples into multiple single z-tests## - p.list<-c() - p.2ndmax<-c() - count=0 - for (i in samplesPrior) { - count=count+1 - singlesample = i - p <-onesamplezscore.median(genotype_matrix,cnv_matrix,singlesample,cnvtype) - p.list[count]<-p[1] - p.2ndmax[count]<-p[2] - } - ##Combine individual P-values with fisher.method## - if (length(p.list) > 1) { - ##Need to change 0 to 1e-300 for sumlog function## - p.list<-rapply(p.list,function(x) ifelse(x==0,1e-300,x), how = "replace") - p.2ndmax<-rapply(p.2ndmax,function(x) ifelse(x==0,1e-300,x), how = "replace") - p <- list(sumlog(unlist(p.list))$p, sumlog(unlist(p.2ndmax))$p) - } else { - p <- c(p.list[1], p.2ndmax[1]) - } - p[3]<-"singlesampZ" - names(p)<-c("Pvalue","Pmax_2nd","Test") - } - rank_sep<-samprank_sep(genotype_matrix,cnv_matrix,cnvtype) - output=matrix(unlist(c(chr,start,end,cnvID,sampleOrigIDs,cnvtypeOrigIDs,power,p[1],p[2],p[3],rank_sep[1],rank_sep[2])),nrow=1) - return(output) -} - -##Wrapper## -#Loads regions: chr start end locusID sampleID1,sampleID2,... -intervals <- read.table(opt$bed, sep = "\t", header = F) - -#Make start and end numeric -intervals[, c(2:3)] <- - apply(intervals[, c(2:3)], 2, function(vals) { - return(as.numeric(as.character(vals))) - }) -intervals <- data.frame(lapply(intervals, as.character), stringsAsFactors=FALSE) -results<-apply(intervals,1,runRdTest) -if(class(results)=="list") { - results<-do.call(rbind,results) -} else { - results <- t(results) -} -#write ouputfile## -if (opt$denovo==FALSE) { - if(!file.exists(paste(opt$outFolder,opt$outputname,".metrics",sep=""))) { - ##write header## - write.table(matrix(c("chr","Start","End","CNVID","SampleIDs","Type","Median_Power","P","2ndMaxP","Model","Median_Rank","Median_Separation"),nrow=1),paste(opt$outFolder, opt$outputname, ".metrics", sep = ""),quote=FALSE,row.names=FALSE,col.names=FALSE,sep= "\t") - } - write.table(results,paste(opt$outFolder, opt$outputname, ".metrics", sep = ""), - quote = FALSE,col.names = FALSE, row.names = FALSE,append=TRUE,sep= "\t") -} else { - if(!file.exists(paste(opt$outFolder, opt$outputname,".denovo",sep=""))) { - ##write header for de novo## - if (opt$quartetDenovo==TRUE) { - write.table(matrix(c("chr","Start","End","CNVID","Type","Family","AffectedMember","Pro.P","Sib.P","Fa.P","Mo.P","Pro.secMaxP","Sib.secMaxP","Fa.secMaxP","Mo.secMaxP","Pro.Sep","Sib.Sep","Fa.Sep","Mo.Sep","Pro.rank","Sib.rank","Fa.rank","Mo.rank"),nrow=1),paste(opt$outFolder, opt$outputname, ".denovo", sep = ""),quote=FALSE,row.names=FALSE,col.names=FALSE,sep= "\t") - } else { write.table(matrix(c("chr","Start","End","CNVID","SampleIDs","Type","Median_Power","P","2ndMaxP","Model","Median_Rank","Median_Separation"),nrow=1),paste(opt$outFolder, opt$outputname, ".denovo", sep = ""),quote=FALSE,row.names=FALSE,col.names=FALSE,sep= "\t") } - } - write.table(results,paste(opt$outFolder, opt$outputname, ".denovo", sep = ""), - quote = FALSE,col.names = FALSE, row.names = FALSE,append=TRUE,sep= "\t") -} - -cat("FINISHED\n") diff --git a/dockerfiles/rdpesr/add_GC_anno_to_bed.R b/dockerfiles/rdpesr/add_GC_anno_to_bed.R deleted file mode 100755 index ee8d1c5de..000000000 --- a/dockerfiles/rdpesr/add_GC_anno_to_bed.R +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env Rscript -library("optparse") - -option_list = list( - make_option(c("-b", "--bedfile"), type="character", default=NULL, - help="name of input bed file", metavar="character"), - make_option(c("-o", "--output"), type="character", default=NULL, - help="name of output file", metavar="character"), - - make_option(c( "--left_vs_SR"), type="character", default=NULL, - help="", metavar="character"), - make_option(c( "--left_vs_SD"), type="character", default=NULL, - help="", metavar="character"), - make_option(c( "--left_vs_RM"), type="character", default=NULL, - help="", metavar="character"), - - make_option(c( "--right_vs_SR"), type="character", default=NULL, - help="", metavar="character"), - make_option(c( "--right_vs_SD"), type="character", default=NULL, - help="", metavar="character"), - make_option(c( "--right_vs_RM"), type="character", default=NULL, - help="", metavar="character") - ); - -opt_parser = OptionParser(option_list=option_list); -opt = parse_args(opt_parser); - -filename=opt$input - -dat=read.table(opt$bedfile,sep='\t') -sr_le=read.table(opt$left_vs_SR,sep='\t') -sd_le=read.table(opt$left_vs_SD,sep='\t') -rm_le=read.table(opt$left_vs_RM,sep='\t') - -sr_ri=read.table(opt$right_vs_SR,sep='\t') -sd_ri=read.table(opt$right_vs_SD,sep='\t') -rm_ri=read.table(opt$right_vs_RM,sep='\t') - -dat[,ncol(dat)+1] = 'US' -dat[dat[,4]%in%rm_le[,4],][,ncol(dat)]='RM' -dat[dat[,4]%in%rm_ri[,4] & !dat[,5]%in%c('INS','ALU','LINE1','MEI','SVA'),][,ncol(dat)]='RM' - -dat[dat[,4]%in%sd_le[,4],][,ncol(dat)]='SD' -dat[dat[,4]%in%sd_ri[,4] & !dat[,5]%in%c('INS','ALU','LINE1','MEI','SVA'),][,ncol(dat)]='SD' - -dat[dat[,4]%in%sr_le[,4],][,ncol(dat)]='SR' -dat[dat[,4]%in%sr_ri[,4] & !dat[,5]%in%c('INS','ALU','LINE1','MEI','SVA'),][,ncol(dat)]='SR' - -write.table(dat,opt$output, quote=F, sep='\t', col.names=F, row.names=F) - - diff --git a/dockerfiles/rdpesr/add_RD_to_SVs.py b/dockerfiles/rdpesr/add_RD_to_SVs.py deleted file mode 100755 index 15d2d7722..000000000 --- a/dockerfiles/rdpesr/add_RD_to_SVs.py +++ /dev/null @@ -1,75 +0,0 @@ -# script to add cov to SVs - -import os -import argparse -import numpy as np - - -def add_ILL_cov(pb_uni_svs, bincov): - for i in pb_uni_svs.keys(): - for j in pb_uni_svs[i]: - cov_list = cov_SV_readin(j, bincov) - if len(cov_list) > 0: - j += [len(cov_list), np.median(cov_list), - np.mean(cov_list), np.std(cov_list)] - else: - j += [0, 'nan', 'nan', 'nan'] - # print(j) - return pb_uni_svs - - -def bed_info_readin(input): - fin = open(input) - out = {} - for line in fin: - pin = line.strip().split() - if pin[0][0] == '#': - continue - if not pin[0] in out.keys(): - out[pin[0]] = [] - out[pin[0]].append([pin[0], int(pin[1]), int(pin[2])] + pin[3:]) - fin.close() - return out - - -def cov_SV_readin(svpos, bincov): - fin = os.popen(r'''tabix %s %s:%d-%d''' % - (bincov, svpos[0], svpos[1], svpos[2])) - normCov_list = [] - for line in fin: - pin = line.strip().split() - normCov_list.append(float(pin[-1])) - fin.close() - return normCov_list - - -def path_modify(path): - if not path[-1] == '/': - path += '/' - return path - - -def write_output(output, pb_uni_svs): - fo = open(output, 'w') - for k1 in pb_uni_svs.keys(): - for k2 in pb_uni_svs[k1]: - print('\t'.join([str(i) for i in k2]), file=fo) - fo.close() - - -def main(): - parser = argparse.ArgumentParser( - description='S2a.calcu.Seq_Cov.of.PB_Uni.py') - parser.add_argument( - 'input', help='name of input file containing PacBio unique SVs in bed format') - parser.add_argument( - 'bincov', help='name of bincov metrics of the sample to be processed') - parser.add_argument( - 'output', help='name of bincov metrics of the sample to be processed') - args = parser.parse_args() - pb_uni_svs = bed_info_readin(args.input) - pb_uni_svs = add_ILL_cov(pb_uni_svs, args.bincov) - write_output(args.output, pb_uni_svs) - - -main() diff --git a/dockerfiles/rdpesr/add_SR_PE_to_PB_INS.V2.py b/dockerfiles/rdpesr/add_SR_PE_to_PB_INS.V2.py deleted file mode 100755 index a0c8c4a43..000000000 --- a/dockerfiles/rdpesr/add_SR_PE_to_PB_INS.V2.py +++ /dev/null @@ -1,147 +0,0 @@ -import os - - -def INS_readin(filein): - fin = open(filein) - out = [] - for line in fin: - pin = line.strip().split() - if pin[0][0] == '#': - continue - # if pin[4]=='INS': - out.append(pin) - fin.close() - return out - - -def add_Num_SR_le(sr_index, info, flank_length=100): - # eg of info: ['chr1', '137221', '137339', 'HOM', 'INS'] - fin = os.popen(r'''tabix %s %s:%d-%d''' % (sr_index, - info[0], int(info[1]) - flank_length, int(info[1]) + flank_length)) - tmp = [] - for line in fin: - pin = line.strip().split() - tmp.append(pin) - fin.close() - if len(tmp) == 0: - return 0 - else: - return max([int(i[3]) for i in tmp]) - - -def add_Num_SR_ri(sr_index, info, flank_length=100): - # eg of info: ['chr1', '137221', '137339', 'HOM', 'INS'] - fin = os.popen(r'''tabix %s %s:%d-%d''' % (sr_index, - info[0], int(info[2]) - flank_length, int(info[2]) + flank_length)) - tmp = [] - for line in fin: - pin = line.strip().split() - tmp.append(pin) - fin.close() - if len(tmp) == 0: - return 0 - else: - return max([int(i[3]) for i in tmp]) - - -def add_Num_PE_le(pe_index, info, flank_length=300): - fin = os.popen(r'''tabix %s %s:%d-%d''' % (pe_index, - info[0], int(info[1]) - 2 * flank_length, int(info[1]) + flank_length)) - tmp = [] - for line in fin: - pin = line.strip().split() - if 'INS' in pin[4] or pin[4] in ['INS', 'ALU', 'LINE1', 'SVA']: - tmp.append(pin) - else: - if pin[0] == pin[3]: - if abs(int(pin[4]) - int(pin[1])) > 100 * (int(info[2]) - int(info[1])): - continue - else: - tmp.append(pin) - fin.close() - # if len(tmp)==0: - # return 0 - # else: - # cluster_hash= cluster_pe_mate(tmp) - # return cluster_hash[0] - return len(tmp) - - -def add_Num_PE_ri(pe_index, info, flank_length=300): - fin = os.popen(r'''tabix %s %s:%d-%d''' % (pe_index, - info[0], int(info[2]) - flank_length, int(info[2]) + 2 * flank_length)) - tmp = [] - for line in fin: - pin = line.strip().split() - if 'INS' in pin[4] or pin[4] in ['INS', 'ALU', 'LINE1', 'SVA']: - tmp.append(pin) - else: - if pin[0] == pin[3]: - if abs(int(pin[4]) - int(pin[1])) > 100 * (int(info[2]) - int(info[1])): - continue - else: - tmp.append(pin) - fin.close() - # if len(tmp)==0: - # return 0 - # else: - # cluster_hash= cluster_pe_mate(tmp) - # return cluster_hash[0] - return len(tmp) - - -def cluster_pe_mate(tmp): - out = {} - for i in tmp: - if not i[3] in out.keys(): - out[i[3]] = [] - out[i[3]].append(int(i[4])) - key_name = [i for i in out.keys()] - key_lengh = [len(out[i]) for i in key_name] - most_abundant = key_name[key_lengh.index(max(key_lengh))] - return [most_abundant, sorted(out[most_abundant])] - - -def write_Num_SR(info_list, fileout): - fo = open(fileout, 'w') - for i in info_list: - print('\t'.join([str(j) for j in i]), file=fo) - fo.close() - - -def main(): - import argparse - parser = argparse.ArgumentParser("add_SR_PE_to_PB_INS.py") - parser.add_argument('PB_bed', type=str, - help='name of input PacBio bed file') - parser.add_argument('pe_file', type=str, - help='name of pe files with index') - parser.add_argument('sr_file', type=str, - help='name of sr files with index') - parser.add_argument('output', type=str, - help='name of output files with index') - args = parser.parse_args() - filein = args.PB_bed - pe_index = args.pe_file - sr_index = args.sr_file - info_list = INS_readin(filein) - for i in info_list: - i += [add_Num_PE_le(pe_index, i)] - i += [add_Num_PE_ri(pe_index, i)] - if i[4] == 'INS' or i[4] == 'MEI': - i += [add_Num_SR_le(sr_index, i, 50)] - i += [add_Num_SR_ri(sr_index, i, 50)] - else: - if int(i[5]) < 300: - i += [add_Num_SR_le(sr_index, i, int(i[5]) / 2)] - i += [add_Num_SR_ri(sr_index, i, int(i[5]) / 2)] - else: - i += [add_Num_SR_le(sr_index, i, 150)] - i += [add_Num_SR_ri(sr_index, i, 150)] - i += [add_Num_SR_le(sr_index, i, 0)] - i += [add_Num_SR_ri(sr_index, i, 0)] - write_Num_SR(info_list, args.output) - - -if __name__ == '__main__': - main() diff --git a/dockerfiles/rdpesr/add_SR_PE_to_breakpoints.py b/dockerfiles/rdpesr/add_SR_PE_to_breakpoints.py deleted file mode 100755 index 9dbe207d5..000000000 --- a/dockerfiles/rdpesr/add_SR_PE_to_breakpoints.py +++ /dev/null @@ -1,75 +0,0 @@ -import os - - -def INS_readin(filein): - fin = open(filein) - out = [] - for line in fin: - pin = line.strip().split() - # if pin[4]=='INS': - out.append(pin) - fin.close() - return out - - -def add_Num_SR(sr_index, info, flank_length=50): - # eg of info: ['chr1', '137221', '137339', 'HOM', 'INS'] - fin = os.popen(r'''tabix %s %s:%d-%d''' % (sr_index, - info[0], int(info[1]) - flank_length, int(info[1]) + flank_length)) - tmp = [] - for line in fin: - pin = line.strip().split() - tmp.append(pin) - fin.close() - if len(tmp) == 0: - return 0 - else: - return max([int(i[3]) for i in tmp]) - - -def add_Num_PE(pe_index, info, flank_length=100): - fin = os.popen(r'''tabix %s %s:%d-%d''' % (pe_index, - info[0], int(info[1]) - flank_length, int(info[1]) + flank_length)) - tmp = [] - for line in fin: - pin = line.strip().split() - # if int(pin[2])-int(pin[1])>100*(int(info[2])-int(info[1])): continue - tmp.append(pin) - fin.close() - # if len(tmp)==0: - # return 0 - # else: - # cluster_hash= cluster_pe_mate(tmp) - # return cluster_hash[0] - return len(tmp) - - -def write_Num_SR(info_list, fileout): - fo = open(fileout, 'w') - for i in info_list: - print('\t'.join([str(j) for j in i]), file=fo) - fo.close() - - -def main(): - import argparse - parser = argparse.ArgumentParser("add_SR_PE_to_PB_INS.py") - parser.add_argument('PB_bed', type=str, - help='name of input PacBio bed file') - parser.add_argument('pe_file', type=str, - help='name of pe files with index') - # parser.add_argument('sr_file', type=str, help='name of sr files with index') - args = parser.parse_args() - filein = args.PB_bed - pe_index = args.pe_file - # sr_index = args.sr_file - info_list = INS_readin(filein) - for i in info_list: - # i+=[add_Num_SR(sr_index,i)] - i += [add_Num_PE(pe_index, i)] - print(i) - write_Num_SR(info_list, filein + '.with_INS_PE') - - -if __name__ == '__main__': - main() diff --git a/dockerfiles/rdpesr/bincov_to_normCov.R b/dockerfiles/rdpesr/bincov_to_normCov.R deleted file mode 100755 index aae75ef75..000000000 --- a/dockerfiles/rdpesr/bincov_to_normCov.R +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env Rscript -library("optparse") - -option_list = list( - make_option(c("-i", "--input"), type="character", default=NULL, - help="name of input bincov tsv file", metavar="character") - ); - -opt_parser = OptionParser(option_list=option_list); -opt = parse_args(opt_parser); - -filein=opt$input -dat=read.table(filein) -median=median(dat[,4]) -dat[,4]=dat[,4]/median -write.table(dat, gsub('.gz','',gsub('bincov','normCov',filein)), quote=F, sep='\t', col.names=F, row.names=F) - - diff --git a/dockerfiles/rdpesr/calcu_inheri_stat.py b/dockerfiles/rdpesr/calcu_inheri_stat.py deleted file mode 100755 index 3ccba3afd..000000000 --- a/dockerfiles/rdpesr/calcu_inheri_stat.py +++ /dev/null @@ -1,108 +0,0 @@ -import pysam -import argparse - - -def calcu_inheri_hash(vcf_file, fam_file): - fam_info = trio_info_readin(fam_file) - fvcf = pysam.VariantFile(vcf_file) - inheri_hash = {} - for child in fam_info.keys(): - inheri_hash[child] = [] - for record in fvcf: - print(record.id) - for child in fam_info.keys(): - trio = fam_info[child] + [child] - trio_len = sum( - [1 if i in record.samples.keys() else 0 for i in trio]) - if trio_len == 3: - gt = [record.samples[i]['GT'] for i in trio] - if (None, None) in gt: - continue - if gt == [(0, 0), (0, 0), (0, 0)]: - continue - else: - if gt[1] == (0, 0) and not gt[0] == (0, 0) and not gt[2] == (0, 0): - inheri_hash[child].append( - ['fa_pb', record.info['SVTYPE']]) - if gt[0] == (0, 0) and not gt[1] == (0, 0) and not gt[2] == (0, 0): - inheri_hash[child].append( - ['mo_pb', record.info['SVTYPE']]) - if not gt[0] == (0, 0) and not gt[1] == (0, 0) and not gt[2] == (0, 0): - inheri_hash[child].append( - ['fa_mo_pb', record.info['SVTYPE']]) - if gt[0] == (0, 0) and gt[1] == (0, 0) and not gt[2] == (0, 0): - inheri_hash[child].append( - ['denovo', record.info['SVTYPE']]) - fvcf.close() - return inheri_hash - - -def inheri_hash_to_stat(inheri_hash): - inheri_stat = {} - for child in inheri_hash.keys(): - inheri_stat[child] = {} - for rec in inheri_hash[child]: - if not rec[1] in inheri_stat[child].keys(): - inheri_stat[child][rec[1]] = {} - if not rec[0] in inheri_stat[child][rec[1]].keys(): - inheri_stat[child][rec[1]][rec[0]] = 0 - inheri_stat[child][rec[1]][rec[0]] += 1 - return inheri_stat - - -def trio_info_readin(fam_file): - fam_info = {} - fin = open(fam_file) - for line in fin: - pin = line.strip().split() - if pin[2] == '0' and pin[3] == '0': - continue - if not pin[1] in fam_info.keys(): - fam_info[pin[1]] = pin[2:4] - fin.close() - return fam_info - - -def unique_list(list): - out = [] - for i in list: - if i not in out: - out.append(i) - return out - - -def write_output_stat(fileout, inheri_stat): - fo = open(fileout, 'w') - print('\t'.join(['sample', 'svtype', 'fa_mo_pb', - 'fa_pb', 'mo_pb', 'denovo']), file=fo) - for samp in inheri_stat.keys(): - for svt in inheri_stat[samp].keys(): - tmp = [] - for inh in ['fa_mo_pb', 'fa_pb', 'mo_pb', 'denovo']: - if inh in inheri_stat[samp][svt].keys(): - tmp.append(inheri_stat[samp][svt][inh]) - else: - tmp.append(0) - print('\t'.join([str(i) for i in [samp, svt] + tmp]), file=fo) - fo.close() - - -def main(): - parser = argparse.ArgumentParser("GATK-SV.S1.vcf2bed.py") - parser.add_argument('fam_file', type=str, help='fam / ped file') - parser.add_argument('vcf_file', type=str, help='vcf file') - parser.add_argument('inheri_stat', type=str, help='name of output stat') - args = parser.parse_args() - # read_write_basic_vcf(args.vcfname,args.bedname) - fam_file = args.fam_file - vcf_file = args.vcf_file - fileout = args.inheri_stat - # readin fam information - # only complete trios would be read in here - inheri_hash = calcu_inheri_hash(vcf_file, fam_file) - inheri_stat = inheri_hash_to_stat(inheri_hash) - write_output_stat(fileout, inheri_stat) - - -if __name__ == '__main__': - main() diff --git a/dockerfiles/rdpesr/install_deprecated_R_package.sh b/dockerfiles/rdpesr/install_deprecated_R_package.sh deleted file mode 100644 index 4184d40c2..000000000 --- a/dockerfiles/rdpesr/install_deprecated_R_package.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -MODULE_ARCHIVE_URL=$1 - -ARCHIVE_DIR=$(mktemp -d "${TMPDIR:-/tmp/}$(basename $0).XXXXXXXXXXXX") -trap "rm -rf $ARCHIVE_DIR" EXIT - -MODULE_ARCHIVE_DEST="$ARCHIVE_DIR/$(basename "$MODULE_ARCHIVE_URL")" -curl "$MODULE_ARCHIVE_URL" --output "$MODULE_ARCHIVE_DEST" -Rscript -e "install.packages('$MODULE_ARCHIVE_DEST', repos = NULL, quiet = TRUE)" diff --git a/dockerfiles/rdpesr/integrate_annotations.R b/dockerfiles/rdpesr/integrate_annotations.R deleted file mode 100644 index 4f50f26b4..000000000 --- a/dockerfiles/rdpesr/integrate_annotations.R +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env Rscript -library("optparse") - -option_list = list( - make_option(c( "--gc_anno"), type="character", default=NULL, help="bed file annotated with genomic content", metavar="character"), - make_option(c( "--duphold_il"), type="character", default=NULL, help="vcf file annotated with duphold", metavar="character"), - make_option(c( "--duphold_il_le"), type="character", default=NULL, help="left flank of SVs in vcf annotated with duphold", metavar="character"), - make_option(c( "--duphold_il_ri"), type="character", default=NULL, help="right flank of SVs in vcf file annotated with duphold", metavar="character"), - make_option(c( "--rd"), type="character", default=NULL, help="bed file annotated with bincov rd", metavar="character"), - make_option(c( "--rd_le"), type="character", default=NULL, help="left flank of SVs in bed file annotated with bincov rd", metavar="character"), - make_option(c( "--rd_ri"), type="character", default=NULL, help="right flank of SVs in bed file annotated with bincov rd", metavar="character"), - make_option(c( "--pesr"), type="character", default=NULL, help="SVs in bed file annotated with pe sr counts", metavar="character"), - make_option(c( "--info"), type="character", default=NULL, help="SVID with annotations such as SVTYPE SVLEN ALGORITHMS EVIDENCE FILTER", metavar="character"), - make_option(c( "--gt"), type="character", default=NULL, help="SVID with annotations such as GT and GQ", metavar="character"), - make_option(c( "--raw_manta"), type="character", default=NULL, help="comparison results of SV vs. raw manta SVs", metavar="character"), - make_option(c( "--raw_wham"), type="character", default=NULL, help="comparison results of SV vs. raw wham SVs", metavar="character"), - make_option(c( "--raw_melt"), type="character", default=NULL, help="comparison results of SV vs. raw melt SVs", metavar="character"), - make_option(c( "--denovo"), type="character", default=NULL, help="two column file with SVID and de novo rate", metavar="character"), - make_option(c( "--output"), type="character", default=NULL, help="output file", metavar="character") - ); - -opt_parser = OptionParser(option_list=option_list); -opt = parse_args(opt_parser); - -extract_duphold_info<-function(duphold_name){ - duphold=read.table(duphold_name) - out=duphold[,c(3,9,10)] - for(i in c('DHFC','DHFFC','DHBFC')){ - out[,ncol(out)+1]=apply(out,1,function(x){strsplit(as.character(x[3]),':')[[1]][match(i,strsplit(as.character(x[2]),':')[[1]])]}) - colnames(out)[ncol(out)]=i - } - colnames(out)[1]='SVID' - return(out[,c(1,4:ncol(out))]) -} - -dat=read.table(opt$gc_anno,sep='\t') -dat=dat[dat[,6]!="",] -colnames(dat)=c('#CHR','POS','END','SVID','SVTYPE','sample','svtype','length','GC') - -duphold = extract_duphold_info(opt$duphold_il) -colnames(duphold)=c('SVID','DHFC_IL','DHFFC_IL','DHBFC_IL') -dat=merge(dat, duphold, by='SVID') - -duphold_le=extract_duphold_info(opt$duphold_il_le) -colnames(duphold_le)=c('SVID','DHFC_IL_le','DHFFC_IL_le','DHBFC_IL_le') -dat=merge(dat, duphold_le, by='SVID') - -duphold_ri=extract_duphold_info(opt$duphold_il_ri) -colnames(duphold_ri)=c('SVID','DHFC_IL_ri','DHFFC_IL_ri','DHBFC_IL_ri') -dat=merge(dat, duphold_ri, by='SVID') - -rd=read.table(opt$rd) -colnames(rd)[c(4,ncol(rd)-2,ncol(rd)-1,ncol(rd))]=c('SVID','rd_median','rd_mean','rd_std') -dat=merge(dat, rd[,c(4,ncol(rd)-2,ncol(rd)-1,ncol(rd))], by='SVID') - -rd_le=read.table(opt$rd_le) -colnames(rd_le)[c(4,ncol(rd_le)-2,ncol(rd_le)-1,ncol(rd_le))]=c('SVID','rd_median_le','rd_mean_le','rd_std_le') -dat=merge(dat, rd_le[,c(4,ncol(rd_le)-2,ncol(rd_le)-1,ncol(rd_le))], by='SVID') - -rd_ri=read.table(opt$rd_ri) -colnames(rd_ri)[c(4,ncol(rd_ri)-2,ncol(rd_ri)-1,ncol(rd_ri))]=c('SVID','rd_median_ri','rd_mean_ri','rd_std_ri') -dat=merge(dat, rd_ri[,c(4,ncol(rd_ri)-2,ncol(rd_ri)-1,ncol(rd_ri))], by='SVID') - -pesr=read.table(opt$pesr) -colnames(pesr)[c(4,ncol(pesr)-5,ncol(pesr)-4,ncol(pesr)-3,ncol(pesr)-2,ncol(pesr)-1,ncol(pesr))]=c('SVID','PE_le','PE_ri','SR_le','SR_ri','SR_le_V2','SR_ri_V2') -dat=merge(dat, pesr[,c(4,ncol(pesr)-5,ncol(pesr)-4,ncol(pesr)-3,ncol(pesr)-2,ncol(pesr)-1,ncol(pesr))], by='SVID') - -dat[,ncol(dat)+1]=apply(dat[,c('PE_le','PE_ri')],1,max) -colnames(dat)[ncol(dat)]='PE_max' -dat[,ncol(dat)+1]=apply(dat[,c('PE_le','PE_ri')],1,min) -colnames(dat)[ncol(dat)]='PE_min' -dat[,ncol(dat)+1]=apply(dat[,c('SR_le','SR_ri')],1,max) -colnames(dat)[ncol(dat)]='SR_max' -dat[,ncol(dat)+1]=apply(dat[,c('SR_le','SR_ri')],1,min) -colnames(dat)[ncol(dat)]='SR_min' - -info=read.table(opt$info,sep='\t', header =T) -colnames(info)[1]='SVID' -info[,ncol(info)+1] = apply(info,1,function(x){grepl('BOTHSIDES_SUPPORT',as.character(x[ncol(info)]))}) -colnames(info)[ncol(info)]='BothSideSupp' -dat=merge(dat, info[,c('SVID','ALGORITHMS','EVIDENCE','FILTER','BothSideSupp')], by='SVID') - -gtgq=read.table(opt$gt, header =T) -dat=merge(dat, gtgq, by='SVID') - -dnv = read.table(opt$denovo, header =T) -dat=merge(dat, dnv, by='SVID') - -dat[,ncol(dat)+1] = 's4_over5Kb' -colnames(dat)[ncol(dat)]='size_cate' -dat[dat$length<5000,][,ncol(dat)]='s3_1to5Kb' -dat[dat$length<1000,][,ncol(dat)]='s2_250bpto1Kb' -dat[dat$length<250,][,ncol(dat)]='s1_under250bp' - -vs_manta=read.table(opt$raw_manta, comment.char="", header=T, sep='\t') -colnames(vs_manta)[c(4,8,9)]=c('SVID','vs_raw_manta_ovr1a','vs_raw_manta_ovr1b') -dat=merge(dat, vs_manta[,c(4,8,9)], by='SVID') -vs_wham=read.table(opt$raw_wham, comment.char="", header=T, sep='\t') -colnames(vs_wham)[c(4,8,9)]=c('SVID','vs_raw_wham_ovr1a','vs_raw_wham_ovr1b') -dat=merge(dat, vs_wham[,c(4,8,9)], by='SVID') -vs_melt=read.table(opt$raw_melt, comment.char="", header=T, sep='\t') -colnames(vs_melt)[c(4,8,9)]=c('SVID','vs_raw_melt_ovr1a','vs_raw_melt_ovr1b') -dat=merge(dat, vs_melt[,c(4,8,9)], by='SVID') - -write.table(dat[,c(2:4,1,5:ncol(dat))],opt$output, quote =F, sep='\t', col.names=T, row.names=F) diff --git a/dockerfiles/rdpesr/integrate_annotations_wo_duphold.R b/dockerfiles/rdpesr/integrate_annotations_wo_duphold.R deleted file mode 100644 index f53cb63e6..000000000 --- a/dockerfiles/rdpesr/integrate_annotations_wo_duphold.R +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env Rscript -library("optparse") - -option_list = list( - make_option(c( "--anno"), type="character", default=NULL, help="bed file annotated with all annotations", metavar="character"), - make_option(c( "--rd"), type="character", default=NULL, help="bed file annotated with bincov rd", metavar="character"), - make_option(c( "--rd_le"), type="character", default=NULL, help="left flank of SVs in bed file annotated with bincov rd", metavar="character"), - make_option(c( "--rd_ri"), type="character", default=NULL, help="right flank of SVs in bed file annotated with bincov rd", metavar="character"), - make_option(c( "--pesr"), type="character", default=NULL, help="SVs in bed file annotated with pe sr counts", metavar="character"), - #make_option(c( "--info"), type="character", default=NULL, help="SVID with annotations such as SVTYPE SVLEN ALGORITHMS EVIDENCE FILTER", metavar="character"), - make_option(c( "--gt"), type="character", default=NULL, help="SVID with annotations such as GT and GQ", metavar="character"), - make_option(c( "--raw_manta"), type="character", default=NULL, help="comparison results of SV vs. raw manta SVs", metavar="character"), - make_option(c( "--raw_wham"), type="character", default=NULL, help="comparison results of SV vs. raw wham SVs", metavar="character"), - make_option(c( "--raw_melt"), type="character", default=NULL, help="comparison results of SV vs. raw melt SVs", metavar="character"), - make_option(c( "--denovo"), type="character", default=NULL, help="two column file with SVID and de novo rate", metavar="character"), - make_option(c( "--output"), type="character", default=NULL, help="output file", metavar="character") - ); - -opt_parser = OptionParser(option_list=option_list); -opt = parse_args(opt_parser); - -extract_duphold_info<-function(duphold_name){ - duphold=read.table(duphold_name) - out=duphold[,c(3,9,10)] - for(i in c('DHFC','DHFFC','DHBFC')){ - out[,ncol(out)+1]=apply(out,1,function(x){strsplit(as.character(x[3]),':')[[1]][match(i,strsplit(as.character(x[2]),':')[[1]])]}) - colnames(out)[ncol(out)]=i - } - colnames(out)[1]='SVID' - return(out[,c(1,4:ncol(out))]) -} - -anno=read.table(opt$anno, header=T) - -rd=read.table(opt$rd) -colnames(rd)[c(1:4,ncol(rd)-2,ncol(rd)-1,ncol(rd))]=c('#chr','pos','end','SVID','rd_median','rd_mean','rd_std') -dat=merge(rd[,c(1:4,ncol(rd)-2,ncol(rd)-1,ncol(rd))],anno, by='SVID') - -rd_le=read.table(opt$rd_le) -colnames(rd_le)[c(4,ncol(rd_le)-2,ncol(rd_le)-1,ncol(rd_le))]=c('SVID','rd_median_le','rd_mean_le','rd_std_le') -dat=merge(dat, rd_le[,c(4,ncol(rd_le)-2,ncol(rd_le)-1,ncol(rd_le))], by='SVID') - -rd_ri=read.table(opt$rd_ri) -colnames(rd_ri)[c(4,ncol(rd_ri)-2,ncol(rd_ri)-1,ncol(rd_ri))]=c('SVID','rd_median_ri','rd_mean_ri','rd_std_ri') -dat=merge(dat, rd_ri[,c(4,ncol(rd_ri)-2,ncol(rd_ri)-1,ncol(rd_ri))], by='SVID') - -pesr=read.table(opt$pesr) -colnames(pesr)[c(4,ncol(pesr)-5,ncol(pesr)-4,ncol(pesr)-3,ncol(pesr)-2,ncol(pesr)-1,ncol(pesr))]=c('SVID','PE_le','PE_ri','SR_le','SR_ri','SR_le_V2','SR_ri_V2') -dat=merge(dat, pesr[,c(4,ncol(pesr)-5,ncol(pesr)-4,ncol(pesr)-3,ncol(pesr)-2,ncol(pesr)-1,ncol(pesr))], by='SVID') - -dat[,ncol(dat)+1]=apply(dat[,c('PE_le','PE_ri')],1,max) -colnames(dat)[ncol(dat)]='PE_max' -dat[,ncol(dat)+1]=apply(dat[,c('PE_le','PE_ri')],1,min) -colnames(dat)[ncol(dat)]='PE_min' -dat[,ncol(dat)+1]=apply(dat[,c('SR_le','SR_ri')],1,max) -colnames(dat)[ncol(dat)]='SR_max' -dat[,ncol(dat)+1]=apply(dat[,c('SR_le','SR_ri')],1,min) -colnames(dat)[ncol(dat)]='SR_min' - - -gtgq=read.table(opt$gt, header =T) -dat=merge(dat, gtgq, by='SVID') - -dnv = read.table(opt$denovo, header =T) -dnv[,7]=rowSums(dnv[,c(2:5)]) -colnames(dnv)[7]='inheri_trios' -dat=merge(dat, dnv[,c('SVID','denovo_rate','inheri_trios')], by='SVID') - - -vs_manta=read.table(opt$raw_manta, comment.char="", header=T, sep='\t') -colnames(vs_manta)[c(4,8,9)]=c('SVID','vs_raw_manta_ovr1a','vs_raw_manta_ovr1b') -dat=merge(dat, vs_manta[,c(4,8,9)], by='SVID') -vs_wham=read.table(opt$raw_wham, comment.char="", header=T, sep='\t') -colnames(vs_wham)[c(4,8,9)]=c('SVID','vs_raw_wham_ovr1a','vs_raw_wham_ovr1b') -dat=merge(dat, vs_wham[,c(4,8,9)], by='SVID') -vs_melt=read.table(opt$raw_melt, comment.char="", header=T, sep='\t') -colnames(vs_melt)[c(4,8,9)]=c('SVID','vs_raw_melt_ovr1a','vs_raw_melt_ovr1b') -dat=merge(dat, vs_melt[,c(4,8,9)], by='SVID') - -write.table(dat[,c(2:4,1,5:ncol(dat))],opt$output, quote =F, sep='\t', col.names=T, row.names=F) diff --git a/dockerfiles/rdpesr/modify_bed_for_PE_SR_RD_labeling.R b/dockerfiles/rdpesr/modify_bed_for_PE_SR_RD_labeling.R deleted file mode 100755 index fa3870cfe..000000000 --- a/dockerfiles/rdpesr/modify_bed_for_PE_SR_RD_labeling.R +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env Rscript -library("optparse") - -option_list = list( - make_option(c("-i", "--input"), type="character", default=NULL, - help="name of input bed file", metavar="character"), - make_option(c("--le_bp"), type="character", default=NULL, - help="name of output left bp file", metavar="character"), - make_option(c("--ri_bp"), type="character", default=NULL, - help="name of output right bp file", metavar="character"), - make_option(c("--le_flank"), type="character", default=NULL, - help="name of output left flank file", metavar="character"), - make_option(c("--ri_flank"), type="character", default=NULL, - help="name of output right flank file", metavar="character") - - ); - -opt_parser = OptionParser(option_list=option_list); -opt = parse_args(opt_parser); - -filename=opt$input - -flank_length=1000 -dat=read.table(filename,sep='\t') -dat2=dat[,c(1,2,2,4)] -dat3=dat[,c(1,3,3,4)] -dat4=dat2 -dat4[,2]=dat4[,2]-flank_length -dat5=dat3 -dat5[,3]=dat5[,3]+flank_length -write.table(dat2, opt$le_bp, quote=F, sep='\t', col.names=F, row.names=F) -write.table(dat3, opt$ri_bp, quote=F, sep='\t', col.names=F, row.names=F) -write.table(dat4, opt$le_flank, quote=F, sep='\t', col.names=F, row.names=F) -write.table(dat5, opt$ri_flank, quote=F, sep='\t', col.names=F, row.names=F) - diff --git a/dockerfiles/rdpesr/modify_pe_metrics.R b/dockerfiles/rdpesr/modify_pe_metrics.R deleted file mode 100755 index 0e087bd25..000000000 --- a/dockerfiles/rdpesr/modify_pe_metrics.R +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env Rscript -library("optparse") - -option_list = list( - make_option(c("-i", "--input"), type="character", default=NULL, - help="name of input bincov tsv file", metavar="character") - ); - -opt_parser = OptionParser(option_list=option_list); -opt = parse_args(opt_parser); - -filein=opt$input -dat=read.table(filein) -write.table(dat[,c(1,2,5,4,3,6,7)], gsub('.txt.gz','.V2.txt',filein), quote=F, sep='\t', col.names=F, row.names=F) - - - diff --git a/inputs/templates/test/ApplyManualVariantFilter/ApplyManualVariantFilter.json.tmpl b/inputs/templates/test/ApplyManualVariantFilter/ApplyManualVariantFilter.json.tmpl deleted file mode 100644 index 7084918ca..000000000 --- a/inputs/templates/test/ApplyManualVariantFilter/ApplyManualVariantFilter.json.tmpl +++ /dev/null @@ -1,7 +0,0 @@ -{ - "ApplyManualVariantFilter.vcf" : {{ test_batch.clean_vcf | tojson }}, - "ApplyManualVariantFilter.prefix" : {{ test_batch.name | tojson }}, - "ApplyManualVariantFilter.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, - "ApplyManualVariantFilter.bcftools_filter": "(SVTYPE==\"DEL\" && COUNT(ALGORITHMS)==1 && ALGORITHMS==\"wham\") || (ALT==\"\" && COUNT(ALGORITHMS)==1 && ALGORITHMS==\"scramble\" && HIGH_SR_BACKGROUND==1)", - "ApplyManualVariantFilter.filter_name": "high_algorithm_fp_rate" -} diff --git a/inputs/templates/test/Module09/Module09VisualizeSingleSample.json.tmpl b/inputs/templates/test/Module09/Module09VisualizeSingleSample.json.tmpl deleted file mode 100644 index 9c90e8b06..000000000 --- a/inputs/templates/test/Module09/Module09VisualizeSingleSample.json.tmpl +++ /dev/null @@ -1,40 +0,0 @@ -{ - "Module09VisualizeSingleSample.sv_base_mini_docker" : "us.gcr.io/broad-dsde-methods/gatk-sv/sv-base-mini:mw-gnomad-02-6a66c96", - "Module09VisualizeSingleSample.sv_pipeline_docker" : "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2024-04-29-v0.28.4-beta-7fd43657", - "Module09VisualizeSingleSample.igv_docker" : "us.gcr.io/broad-dsde-methods/gatk-sv/igv:mw-xz-fixes-2-b1be6a9", - - "Module09VisualizeSingleSample.Fasta" : "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", - "Module09VisualizeSingleSample.Fasta_idx" : "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "Module09VisualizeSingleSample.Fasta_dict" : "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", - - "Module09VisualizeSingleSample.varfile" : "gs://talkowski-sv-gnomad-output/1KGP/visualize/gatksv_uniq.igv_test.HG00513.bed", - "Module09VisualizeSingleSample.pedfile" : "gs://talkowski-sv-gnomad-output/1KGP/ped/1KGP_2504.ped", - "Module09VisualizeSingleSample.flags" : "", - "Module09VisualizeSingleSample.prefix" : "1KGP_2504.CCDG_over5Kb.all_samples", - "Module09VisualizeSingleSample.batch_bincov" : "gs://talkowski-sv-gnomad-output/1KGP/ped/1KGP_2504_and_698.batch_bincov", - "Module09VisualizeSingleSample.sample_batches" : "gs://talkowski-sv-gnomad-output/1KGP/ped/1KGP_2504_and_698.sample_batch", - - "Module09VisualizeSingleSample.medianfile" : [ - "gs://talkowski-sv-gnomad-output/1KGP/Module00c/0ceb2ba0-914b-46bd-93c2-765937499ac7/call-MedianCov/MedianCov/f218e286-4eef-4cfe-b553-1af0c7657745/call-CalcMedCov/batch_1_1_medianCov.transposed.bed", - "gs://talkowski-sv-gnomad-output/1KGP/Module00c/626aef18-83bc-43a5-8945-455965d74f09/call-MedianCov/MedianCov/bb0d7b43-e0a8-41cf-bdd3-f6e88fdd7c52/call-CalcMedCov/batch_1_2_medianCov.transposed.bed", - "gs://talkowski-sv-gnomad-output/1KGP/Module00c/3d3ffcb1-9e8b-4f62-be42-80eeb3e15c95/call-MedianCov/MedianCov/35721188-6ea5-4974-87fc-682518a24905/call-CalcMedCov/batch_1_3_medianCov.transposed.bed", - "gs://talkowski-sv-gnomad-output/1KGP/Module00c/6dc40530-5262-42b8-b5bf-6ffc1a0da425/call-MedianCov/MedianCov/3660ee77-47db-49cb-86ad-5418f1a53b5e/call-CalcMedCov/batch_1_4_medianCov.transposed.bed", - "gs://talkowski-sv-gnomad-output/1KGP/Module00c/24b6efef-33c1-493e-b473-8f6a065d8ef7/call-MedianCov/MedianCov/e120a6f7-ac04-4e9e-a9f0-dcba64a91efb/call-CalcMedCov/batch_2_1_medianCov.transposed.bed", - "gs://talkowski-sv-gnomad-output/1KGP/Module00c/0b89f2f7-dd3f-4998-8218-f923577ab5f4/call-MedianCov/MedianCov/3bf9c0e8-6e5d-465e-9e8c-eb9cadc70555/call-CalcMedCov/batch_2_2_medianCov.transposed.bed", - "gs://talkowski-sv-gnomad-output/1KGP/Module00c/183259d7-4a8a-4e7a-ad69-8cb0ee0f6aa1/call-MedianCov/MedianCov/044d1f33-9e57-4766-bd7c-9cfd8f1daa50/call-CalcMedCov/batch_2_3_medianCov.transposed.bed", - "gs://talkowski-sv-gnomad-output/1KGP/Module00c/75189c12-3517-4e7c-85b2-6a6433214bdd/call-MedianCov/MedianCov/fb418ece-c96c-4f84-a983-73352a4c8895/call-CalcMedCov/batch_2_4_medianCov.transposed.bed", - "gs://talkowski-sv-gnomad-output/1KGP/Module00c/31d76ba6-1e74-4aa1-9bfa-335dd4075d1a/call-MedianCov/MedianCov/c4116338-4902-4bf6-ac3c-531039730c5b/call-CalcMedCov/batch_3_1_medianCov.transposed.bed", - "gs://talkowski-sv-gnomad-output/1KGP/Module00c/2782a859-c624-4f1e-ab62-5f52593f500d/call-MedianCov/MedianCov/3d333fc7-7f45-497a-87d7-ea29dd00ac8a/call-CalcMedCov/batch_3_2_medianCov.transposed.bed", - "gs://talkowski-sv-gnomad-output/1KGP/Module00c/f488f647-84ba-4466-af90-ecc9ef74a7e1/call-MedianCov/MedianCov/7fc8b8d7-64a8-4bdb-bb0d-ff385c89fe7f/call-CalcMedCov/batch_3_3_medianCov.transposed.bed", - "gs://talkowski-sv-gnomad-output/1KGP/Module00c/11504021-b191-48b6-a4de-6620233f5b51/call-MedianCov/MedianCov/53f910fd-1773-4e2d-af9e-959f9e52cb2b/call-CalcMedCov/batch_3_4_medianCov.transposed.bed", - "gs://talkowski-sv-gnomad-output/1KGP/Module00c/c84be4dc-1493-4f01-a82f-73f6aaf5129a/call-MedianCov/MedianCov/49e6f7c3-26d9-411f-b1ee-f76c4970e240/call-CalcMedCov/batch_4_1_medianCov.transposed.bed", - "gs://talkowski-sv-gnomad-output/1KGP/Module00c/3961ee21-8ccc-4f99-8aaf-d65e73be657c/call-MedianCov/MedianCov/faff7d4a-89cc-4fad-b8d7-13764c31ed51/call-CalcMedCov/batch_4_2_medianCov.transposed.bed", - "gs://talkowski-sv-gnomad-output/1KGP/Module00c/01479285-3b93-4a2e-ba49-f8e7d6583cc8/call-MedianCov/MedianCov/e7561e49-da49-4915-b7e6-c42020d7c4a4/call-CalcMedCov/batch_4_3_medianCov.transposed.bed", - "gs://talkowski-sv-gnomad-output/1KGP/Module00c/736aad8c-30e5-4605-bc7d-7cfca0b204cb/call-MedianCov/MedianCov/90f909f4-4969-4145-8326-2d3ce1943ae9/call-CalcMedCov/batch_4_4_medianCov.transposed.bed"], - "Module09VisualizeSingleSample.sample_list" : ["HG00513"], - "Module09VisualizeSingleSample.cram_list" : ["gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG00513/analysis/HG00513.final.cram"], - "Module09VisualizeSingleSample.crai_list" : ["gs://fc-56ac46ea-efc4-4683-b6d5-6d95bed41c5e/CCDG_13607/Project_CCDG_13607_B01_GRM_WGS.cram.2019-02-06/Sample_HG00513/analysis/HG00513.final.cram.crai"] -} - - - diff --git a/inputs/templates/test/Mosaic/Mosaic.json.tmpl b/inputs/templates/test/Mosaic/Mosaic.json.tmpl deleted file mode 100644 index 3126e9d94..000000000 --- a/inputs/templates/test/Mosaic/Mosaic.json.tmpl +++ /dev/null @@ -1,16 +0,0 @@ -{ - "MosaicManualCheck.rare_cutoff":2, - "MosaicManualCheck.outlier": "gs://gatk-sv-resources/resources/outlier.txt", - "MosaicManualCheck.fam_file": {{ test_batch.ped_file | tojson }}, - "MosaicManualCheck.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, - "MosaicManualCheck.sv_base_mini_docker": {{ dockers.sv_base_mini_docker | tojson }}, - "MosaicManualCheck.agg_metrics": [{{ test_batch.evidence_metrics | tojson }}], - "MosaicManualCheck.per_batch_clustered_pesr_vcf_list": ["gs://gatk-sv-resources/test/mosaic/pesr_list.txt"], - "MosaicManualCheck.clustered_depth_vcfs": [{{ test_batch.merged_depth_vcf | tojson }}], - "MosaicManualCheck.prefix": {{ test_batch.name | tojson }}, - "MosaicManualCheck.RF_cutoffs": [{{ test_batch.cutoffs | tojson }}], - "MosaicManualCheck.coverage_file_idxs": [{{ test_batch.merged_coverage_file_index| tojson }}], - "MosaicManualCheck.coverage_files": [{{ test_batch.merged_coverage_file | tojson }}], - "MosaicManualCheck.median_files": [{{ test_batch.medianfile | tojson }}] -} - diff --git a/inputs/templates/test/RenameVcfSamples/RenameVcfSamples.json.tmpl b/inputs/templates/test/RenameVcfSamples/RenameVcfSamples.json.tmpl deleted file mode 100644 index 1cc40d83b..000000000 --- a/inputs/templates/test/RenameVcfSamples/RenameVcfSamples.json.tmpl +++ /dev/null @@ -1,8 +0,0 @@ -{ - "RenameVcfSamples.vcf": {{ test_batch.clean_vcf | tojson }}, - "RenameVcfSamples.prefix": {{ test_batch.name | tojson }}, - "RenameVcfSamples.current_sample_ids": {{ test_batch.samples | tojson }}, - "RenameVcfSamples.new_sample_ids": {{ test_batch.renamed_ids | tojson }}, - "RenameVcfSamples.check_rename_all_samples": true, - "RenameVcfSamples.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }} -} \ No newline at end of file diff --git a/inputs/templates/test/SetSampleIdLegacy/SetSampleIdLegacy.json.tmpl b/inputs/templates/test/SetSampleIdLegacy/SetSampleIdLegacy.json.tmpl deleted file mode 100644 index 7c774bfda..000000000 --- a/inputs/templates/test/SetSampleIdLegacy/SetSampleIdLegacy.json.tmpl +++ /dev/null @@ -1,8 +0,0 @@ -{ - "SetSampleIdLegacy.sample_name": "xyzzy", - "SetSampleIdLegacy.PE_file": {{ test_batch.PE_example_file | tojson }}, - "SetSampleIdLegacy.SR_file": {{ test_batch.SR_example_file | tojson }}, - "SetSampleIdLegacy.SD_file": {{ test_batch.SD_example_file | tojson }}, - "SetSampleIdLegacy.reference_dict": {{ reference_resources.reference_dict | tojson }}, - "SetSampleIdLegacy.gatk_docker": {{ dockers.gatk_docker | tojson }} -} diff --git a/inputs/templates/test/SubsetVcfBySamples/SubsetVcfBySamples.json.tmpl b/inputs/templates/test/SubsetVcfBySamples/SubsetVcfBySamples.json.tmpl deleted file mode 100644 index ec59d40e7..000000000 --- a/inputs/templates/test/SubsetVcfBySamples/SubsetVcfBySamples.json.tmpl +++ /dev/null @@ -1,7 +0,0 @@ -{ - "SubsetVcfBySamples.vcf" : {{ test_batch.clean_vcf | tojson }}, - - "SubsetVcfBySamples.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, - - "SubsetVcfBySamples.list_of_samples": "gs://gatk-sv-ref-panel-1kg/test_inputs/ref_panel_1kg_subset_10_samples.txt" -} diff --git a/inputs/values/dockers.json b/inputs/values/dockers.json index cd8eb2d27..b4e41f574 100644 --- a/inputs/values/dockers.json +++ b/inputs/values/dockers.json @@ -15,11 +15,8 @@ "sv_pipeline_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2025-01-14-v1.0.1-88dbd052", "sv_pipeline_qc_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2025-01-14-v1.0.1-88dbd052", "wham_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/wham:2024-10-25-v0.29-beta-5ea22a52", - "igv_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/igv:mw-xz-fixes-2-b1be6a9", - "duphold_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/duphold:mw-xz-fixes-2-b1be6a9", "vapor_docker": "us.gcr.io/broad-dsde-methods/eph/vapor:header-hash-2fc8f12", "cloud_sdk_docker": "google/cloud-sdk", - "pangenie_docker": "us.gcr.io/broad-dsde-methods/vjalili/pangenie:vj-127571f", "sv-base-virtual-env": "us.gcr.io/broad-dsde-methods/vjalili/sv-base-virtual-env:5994670", "cnmops-virtual-env": "us.gcr.io/broad-dsde-methods/vjalili/cnmops-virtual-env:5994670", "sv-pipeline-virtual-env": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline-virtual-env:2024-10-25-v0.29-beta-5ea22a52", diff --git a/scripts/cromwell/analyze_monitoring_logs.py b/scripts/cromwell/analyze_monitoring_logs.py deleted file mode 100644 index dc0f34081..000000000 --- a/scripts/cromwell/analyze_monitoring_logs.py +++ /dev/null @@ -1,320 +0,0 @@ -#!/bin/python - -import pandas as pd -import glob -import argparse -import numpy as np -from datetime import datetime -import matplotlib.pyplot as plt - -# Synopsis: -# Generates summary statistics on Cromwell monitoring logs collected using download_monitoring_logs.py. -# Cost estimates assume all machines are preemptible and have a fixed bootup time. Resource -# usage and costs are for requesting optimal resource (equal to the max observed) uniformly across all shards ("static") -# and individually for each shard ("dynamic"). -# -# Usage: -# python analyze_monitoring_logs.py /path/to/logs /path/to/output_base -# -# Parameters: -# /path/to/logs : Path containing monitoring script logs ending in ".monitoring.log" -# /path/to/output_base : Base output path, to which extensions will be appended for each output file -# -# Author: Mark Walker (markw@broadinstitute.org) - -TIME_FORMAT = "%a %b %d %H:%M:%S %Z %Y" -ALL_HEADER = '#job\ttask\thr\tmem_total\tmem_gb_max\tmem_pct_max\tdisk_total\tdisk_gb_max\tdisk_pct_max\tmem_gb_hr\tdisk_gb_hr\tmax_mem_gb_hr\tmax_disk_gb_hr\tcost_mem\tcost_mem_dyn\tcost_disk\tcost_disk_dyn\n' -GROUP_HEADER = '#task\thr\tmem_avg\tmem_gb_max\tmem_pct_max\tdisk_avg\tdisk_gb_max\tdisk_pct_max\tmem_gb_hr\tdisk_gb_hr\tmax_mem_gb_hr\tmax_disk_gb_hr\tcost_mem\tcost_mem_static\tcost_mem_dyn\tcost_disk\tcost_disk_static\tcost_disk_dyn\n' - -COST_PER_GB_MEM_HR = 0.000892 -COST_CPU_HR = 0.006655 -COST_PER_GB_DISK_HR = 0.00005555555 - -MIN_CPU = 1 -MIN_MEM_GB = 0.9 -MIN_DISK_GB = 1 - -BOOT_DISK_GB = 10 -DEFAULT_OVERHEAD_MIN = 5. - - -def write_data(data, file_path, header): - with open(file_path, 'w') as f: - f.write(header) - for key in data.index: - f.write(key + '\t' + '\t'.join([str(x) - for x in data.loc(key)]) + '\n') - - -def read_data(dir, overhead_min=0): - data = {} - for filepath in glob.glob(dir + '/*.monitoring.log'): - with open(filepath, 'r') as f: - mem_gb_data_f = [] - disk_gb_data_f = [] - mem_pct_data_f = [] - disk_pct_data_f = [] - cpu_pct_data_f = [] - total_mem = 0 - total_disk = 0 - total_cpu = 0 - start_time = None - end_time = None - for line in f: - tokens = line.strip().split(' ') - if start_time is None and line.startswith('['): - start_time = datetime.strptime( - line.strip()[1:-1], TIME_FORMAT) - if line.startswith('['): - end_time = datetime.strptime( - line.strip()[1:-1], TIME_FORMAT) - if line.startswith('Total Memory:'): - total_mem = float(tokens[2]) - elif line.startswith('#CPU:'): - total_cpu = float(tokens[1]) - elif line.startswith('Total Disk space:'): - total_disk = float(tokens[3]) - elif line.startswith('* Memory usage:'): - mem_gb = float(tokens[3]) - mem_pct = float(tokens[5][:-1]) / 100.0 - mem_gb_data_f.append(mem_gb) - mem_pct_data_f.append(mem_pct) - elif line.startswith('* Disk usage:'): - disk_gb = float(tokens[3]) - disk_pct = float(tokens[5][:-1]) / 100.0 - disk_gb_data_f.append(disk_gb) - disk_pct_data_f.append(disk_pct) - elif line.startswith('* CPU usage:'): - if len(tokens) == 4: - cpu_pct = float(tokens[3].replace("%", "")) / 100.0 - else: - cpu_pct = 1 - cpu_pct_data_f.append(cpu_pct) - if len(mem_gb_data_f) > 0 and len(disk_gb_data_f) > 0: - filename = filepath.split('/')[-1] - entry = filename.replace(".monitoring.log", "") - task = entry.split('.')[0] - - max_mem_gb = max(mem_gb_data_f) - max_mem_pct = max(mem_pct_data_f) - max_disk_gb = max(disk_gb_data_f) - max_disk_pct = max(disk_pct_data_f) - max_cpu_pct = max(cpu_pct_data_f) - max_cpu = max_cpu_pct * total_cpu - - delta_time = end_time - start_time - delta_hour = (delta_time.total_seconds() / - 3600.) + (overhead_min / 60.0) - cpu_hour = total_cpu * delta_hour - mem_hour = total_mem * delta_hour - disk_hour = total_disk * delta_hour - max_cpu_hour = max_cpu_pct * total_cpu * delta_hour - max_mem_hour = max_mem_gb * delta_hour - max_disk_hour = max_disk_gb * delta_hour - - cost_mem = COST_PER_GB_MEM_HR * mem_hour - cost_mem_opt = COST_PER_GB_MEM_HR * \ - max(max_mem_gb, MIN_MEM_GB) * delta_hour - - cost_disk = COST_PER_GB_DISK_HR * \ - (total_disk + BOOT_DISK_GB) * delta_hour - cost_disk_opt = COST_PER_GB_DISK_HR * \ - (max(max_disk_gb, MIN_DISK_GB) + BOOT_DISK_GB) * delta_hour - - cost_cpu = COST_CPU_HR * total_cpu * delta_hour - cost_cpu_opt = COST_CPU_HR * \ - max(max_cpu, MIN_MEM_GB) * delta_hour - - data[entry] = { - "task": task, - "delta_hour": delta_hour, - "total_cpu": total_cpu, - "total_mem": total_mem, - "total_disk": total_disk, - "max_cpu": max_cpu, - "max_cpu_pct": max_cpu_pct, - "max_mem_gb": max_mem_gb, - "max_mem_pct": max_mem_pct, - "max_disk_gb": max_disk_gb, - "max_disk_pct": max_disk_pct, - "cpu_hour": cpu_hour, - "mem_hour": mem_hour, - "disk_hour": disk_hour, - "max_cpu_hour": max_cpu_hour, - "max_mem_hour": max_mem_hour, - "max_disk_hour": max_disk_hour, - "cost_cpu": cost_cpu, - "cost_cpu_opt": cost_cpu_opt, - "cost_mem": cost_mem, - "cost_mem_opt": cost_mem_opt, - "cost_disk": cost_disk, - "cost_disk_opt": cost_disk_opt - } - return data - - -def get_data_field(name, data): - return [x[name] for x in data] - - -def calc_group(data): - task_names = data.task.unique() - group_data = {} - for task in task_names: - d = data.loc[data['task'] == task] - hours = np.sum(d["delta_hour"]) - avg_cpu = np.mean(d["total_cpu"]) - avg_mem = np.mean(d["total_mem"]) - max_mem = np.max(d["max_mem_gb"]) - max_cpu = np.max(d["max_cpu"]) - max_cpu_pct = np.max(d["max_cpu_pct"]) - max_mem_pct = np.max(d["max_mem_pct"]) - avg_disk = np.mean(d["total_disk"]) - max_disk = np.max(d["max_disk_gb"]) - max_disk_pct = np.max(d["max_disk_pct"]) - cpu_hour = np.sum(d["cpu_hour"]) - mem_hour = np.sum(d["mem_hour"]) - disk_hour = np.sum(d["disk_hour"]) - max_cpu_hour = np.max(d["max_cpu_hour"]) - max_mem_hour = np.max(d["max_mem_hour"]) - max_disk_hour = np.max(d["max_disk_hour"]) - cost_cpu = np.sum(d["cost_cpu"]) - cost_cpu_dyn = np.sum(d["cost_cpu_opt"]) - cost_mem = np.sum(d["cost_mem"]) - cost_mem_dyn = np.sum(d["cost_mem_opt"]) - cost_disk = np.sum(d["cost_disk"]) - cost_disk_dyn = np.sum(d["cost_disk_opt"]) - - cost_cpu_static = COST_CPU_HR * max(max_cpu, MIN_CPU) * hours - cost_mem_static = COST_PER_GB_MEM_HR * max(max_mem, MIN_MEM_GB) * hours - cost_disk_static = COST_PER_GB_DISK_HR * \ - (max(max_disk, MIN_DISK_GB) + BOOT_DISK_GB) * hours - - group_data[task] = { - "hours": hours, - "avg_cpu": avg_cpu, - "avg_mem": avg_mem, - "avg_disk": avg_disk, - "max_cpu": max_cpu, - "max_cpu_pct": max_cpu_pct, - "max_mem": max_mem, - "max_mem_pct": max_mem_pct, - "max_disk": max_disk, - "max_disk_pct": max_disk_pct, - "cpu_hour": cpu_hour, - "mem_hour": mem_hour, - "disk_hour": disk_hour, - "max_cpu_hour": max_cpu_hour, - "max_mem_hour": max_mem_hour, - "max_disk_hour": max_disk_hour, - "cost_cpu": cost_cpu, - "cost_cpu_static": cost_cpu_static, - "cost_cpu_dyn": cost_cpu_dyn, - "cost_mem": cost_mem, - "cost_mem_static": cost_mem_static, - "cost_mem_dyn": cost_mem_dyn, - "cost_disk": cost_disk, - "cost_disk_static": cost_disk_static, - "cost_disk_dyn": cost_disk_dyn, - "total_cost": cost_cpu + cost_mem + cost_disk, - "total_cost_static": cost_cpu_static + cost_mem_static + cost_disk_static, - "total_cost_dyn": cost_cpu_dyn + cost_mem_dyn + cost_disk_dyn - } - return group_data - - -def do_simple_bar(data, xticks, path, bar_width=0.35, height=12, width=12, - xtitle='', ytitle='', title='', bottom_adjust=0, legend=[], - yscale='linear', sort_values=None): - num_groups = max([d.shape[0] for d in data]) - if sort_values is not None: - sort_indexes = np.flip(np.argsort(sort_values)) - else: - sort_indexes = np.arange(num_groups) - plt.figure(num=None, figsize=(width, height), - dpi=100, facecolor='w', edgecolor='k') - for i in range(len(data)): - if i < len(legend): - label = legend[i] - else: - label = "data" + str(i) - x = (np.arange(num_groups) * len(data) + i) * bar_width - plt.bar(x, data[i][sort_indexes], label=label) - x = (np.arange(num_groups) * len(data)) * bar_width - plt.xticks(x, [xticks[i] for i in sort_indexes], rotation='vertical') - plt.xlabel(xtitle) - plt.ylabel(ytitle) - plt.title(title) - plt.subplots_adjust(bottom=bottom_adjust) - plt.yscale(yscale) - plt.legend() - plt.savefig(path) - - -def create_graphs(data, out_files_base, semilog=False, num_samples=None): - tasks = data.index - if num_samples is not None: - data = data / num_samples - ytitle = "Cost, $/sample" - title = "Estimated Cost Per Sample" - else: - ytitle = "Cost, $" - title = "Estimated Total Cost" - - if semilog: - yscale = "log" - else: - yscale = "linear" - - do_simple_bar(data=[data["total_cost"], data["total_cost_static"], data["total_cost_dyn"]], - xticks=tasks, - path=out_files_base + ".cost.png", - bar_width=1, - height=8, - width=12, - xtitle="Task", - ytitle=ytitle, - title=title, - bottom_adjust=0.35, - legend=["Current", "Unif", "Pred"], - yscale=yscale, - sort_values=data["total_cost"]) - - -# Main function -def main(): - parser = argparse.ArgumentParser() - parser.add_argument( - "log_dir", help="Path containing monitoring script logs ending in \".monitoring.log\"") - parser.add_argument("output_file", help="Output tsv file base path") - parser.add_argument("--overhead", help="Localization overhead in minutes") - parser.add_argument("--semilog", help="Plot semilog y", - action="store_true") - parser.add_argument( - "--plot-norm", help="Specify number of samples to normalize plots to per sample") - args = parser.parse_args() - - if not args.overhead: - overhead = DEFAULT_OVERHEAD_MIN - else: - overhead = float(args.overhead) - - if args.plot_norm: - plot_norm = int(args.plot_norm) - else: - plot_norm = None - - log_dir = args.log_dir - out_file = args.output_file - data = read_data(log_dir, overhead_min=overhead) - df = pd.DataFrame(data).T - group_data = calc_group(df) - group_df = pd.DataFrame(group_data).T - df.to_csv(path_or_buf=out_file + ".all.tsv", sep="\t") - group_df.to_csv(path_or_buf=out_file + ".grouped.tsv", sep="\t") - create_graphs(group_df, out_file, semilog=args.semilog, - num_samples=plot_norm) - - -if __name__ == "__main__": - main() diff --git a/scripts/cromwell/analyze_monitoring_logs2.py b/scripts/cromwell/analyze_monitoring_logs2.py deleted file mode 100644 index 34daf444b..000000000 --- a/scripts/cromwell/analyze_monitoring_logs2.py +++ /dev/null @@ -1,313 +0,0 @@ -#!/bin/python - -import pandas as pd -import argparse -import numpy as np -import matplotlib.pyplot as plt -from os.path import basename, isfile, getsize -import logging - -# Synopsis: -# Generates summary statistics on Cromwell monitoring log summary table generated using the following command: -# get_cromwell_resource_usage2.sh -u -r workflow_id > table.tsv -# Cost estimates assume all machines are preemptible and have a fixed boot time. Resource -# costs are estimated for requesting optimal resources (equal to the max observed) uniformly across all shards -# ("static") and individually for each shard ("dynamic"). -# -# Usage: -# python analyze_monitoring_logs2.py /path/to/log_summary_table /path/to/output_base [optional parameters] -# -# Required parameters: -# /path/to/logs : Path containing monitoring script log summary TSV from get_cromwell_resource_usage2.sh -u -r -# /path/to/output_base : Base output path, to which extensions will be appended for each output file -# Optional parameters: -# --overhead: Localization overhead in minutes -# --semilog: Plot semilog y -# --plot-norm: Specify number of samples to normalize plots to per sample -# --log-level LEVEL (specify level of logging information to print, ie. INFO, WARNING, ERROR - not case-sensitive) -# -# Author: Emma Pierce-Hoffman (epierceh@broadinstitute.org) -# Modified from analyze_monitoring_logs.py by Mark Walker - -COST_PER_GB_MEM_HR = 0.000892 -COST_CPU_HR = 0.006655 -COST_PER_GB_DISK_HR = 0.00005555555 - -MIN_CPU = 1 -MIN_MEM_GB = 0.9 -MIN_DISK_GB = 1 - -BOOT_DISK_GB = 10 -DEFAULT_OVERHEAD_MIN = 5. - - -def check_table_columns(columns): - column_set = set(columns) - required_input_columns = ['ElapsedTime', 'nCPU', 'CPU', 'TotMem', 'Mem', 'MemPct', 'TotDisk', 'Disk', - 'DiskPct', 'task'] - missing_cols = [] - missing = False - for col in required_input_columns: - if col not in column_set: - missing = True - missing_cols.append(col) - if missing: - raise RuntimeError( - "Malformed input table; missing column(s): %s. Use TSV from get_cromwell_resource_usage2.sh -u -r" % ", ".join(missing_cols)) - - -def load_data(log_file, overhead_mins): - # columns in input: - # ['ElapsedTime', 'nCPU', 'CPU', 'TotMem', 'Mem', 'MemPct', 'TotDisk', 'Disk', 'DiskPct', 'IORead', 'IOWrite', 'task'] - data = pd.read_table( - log_file, usecols=lambda x: x not in ('IORead', 'IOWrite')) - check_table_columns(data.columns) - # rename some columns for consistency, clarity - data.rename({'task': 'Task', 'ElapsedTime': 'Hours', 'Mem': 'MaxMem', 'CPU': 'PctCPU', - 'Disk': 'MaxDisk', 'MemPct': 'PctMem', 'DiskPct': 'PctDisk'}, axis='columns', inplace=True) - # add MaxCPU column - data['MaxCPU'] = (data['PctCPU'] / 100) * data['nCPU'] - # reorder so Task column is first, MaxCPU is after nCPU (without assuming input order of columns) - cols = data.columns.tolist() - cols = [col for col in cols if col not in ('Task', 'MaxCPU')] - cpu_ind = cols.index('nCPU') - cols = ['Task'] + cols[:cpu_ind + 1] + ['MaxCPU'] + cols[cpu_ind + 1:] - data = data[cols] - # modify formats - data['Hours'] = pd.to_timedelta(data['Hours']).dt.total_seconds( - ) / 3600.0 # convert ElapsedTime to hours (float) - data['Hours'] += overhead_mins / 60.0 - # keep last (most specific) task name, attempt number, and shard number, if present - data['Task'] = data['Task'].str.replace('/shard', '.shard', regex=False) \ - .str.replace('/attempt', '.attempt', regex=False) \ - .str.rsplit('/', n=1).str[-1] - - return data - - -def estimate_costs_per_task(data): - # columns after load_data(): - # ['Hours', 'nCPU', 'MaxCPU', 'PctCPU', 'TotMem', 'MaxMem', 'PctMem', 'TotDisk', 'MaxDisk', 'PctDisk', 'Task'] - # compute resource-hours : actual and with optimal settings based on maximum usage - data['TotCPUHour'] = data['nCPU'] * data['Hours'] - data['MaxCPUHour'] = data['MaxCPU'] * data['Hours'] - data['TotMemHour'] = data['TotMem'] * data['Hours'] - data['MaxMemHour'] = data['MaxMem'] * data['Hours'] - data['TotDiskHour'] = data['TotDisk'] * data['Hours'] - data['MaxDiskHour'] = data['MaxDisk'] * data['Hours'] - - # compute cost estimates : actual and with optimal resource settings based on maximum usage (per-task, so dynamic) - data['TotCPUCost'] = data['TotCPUHour'] * COST_CPU_HR - data['OptCPUCost'] = np.multiply( - np.fmax(data['MaxCPU'], MIN_CPU), data['Hours']) * COST_CPU_HR - data['TotMemCost'] = data['TotMemHour'] * COST_PER_GB_MEM_HR - data['OptMemCost'] = np.multiply( - np.fmax(data['MaxMem'], MIN_MEM_GB), data['Hours']) * COST_PER_GB_MEM_HR - data['TotDiskCost'] = np.multiply( - (data['TotDisk'] + BOOT_DISK_GB), data['Hours']) * COST_PER_GB_DISK_HR - data['OptDiskCost'] = np.multiply((np.fmax( - data['MaxDisk'], MIN_DISK_GB) + BOOT_DISK_GB), data['Hours']) * COST_PER_GB_DISK_HR - data['TotTaskCost'] = data['TotCPUCost'] + \ - data['TotMemCost'] + data['TotDiskCost'] - data['OptTaskCost'] = data['OptCPUCost'] + \ - data['OptMemCost'] + data['OptDiskCost'] - - data.sort_values(by='TotTaskCost', inplace=True, ascending=False) - return data - - -def estimate_costs_per_group(data): - # remove shard number, attempt number if present - data['TaskGroup'] = data['Task'].str.split('.').str[0] - groups = data['TaskGroup'].unique() - data_grouped = pd.DataFrame(columns=['Task', 'Hours', 'AvgCPU', 'MaxCPU', 'PctCPU', 'AvgMem', 'MaxMem', 'PctMem', - 'AvgDisk', 'MaxDisk', 'PctDisk', 'TotCPUHour', 'PeakCPUHour', 'TotMemHour', 'PeakMemHour', - 'TotDiskHour', 'PeakDiskHour', 'TotCPUCost', 'StaticCPUCost', 'DynCPUCost', 'TotMemCost', - 'StaticMemCost', 'DynMemCost', 'TotDiskCost', 'StaticDiskCost', 'DynDiskCost', 'TotCost', - 'StaticCost', 'DynCost']) - - for group in groups: - """ - columns of d: ['Task', 'Hours', 'nCPU', 'MaxCPU', 'PctCPU', 'TotMem', 'MaxMem', - 'PctMem', 'TotDisk', 'MaxDisk', 'PctDisk', 'TotCPUHour', 'MaxCPUHour', - 'TotMemHour', 'MaxMemHour', 'TotDiskHour', 'MaxDiskHour', 'TotCPUCost', - 'OptCPUCost', 'TotMemCost', 'OptMemCost', 'TotDiskCost', 'OptDiskCost', - 'TotTaskCost', 'OptTaskCost'] - """ - d = data.loc[data['TaskGroup'] == group] - hours = np.sum(d['Hours']) - max_cpu = np.nan if np.isnan( - d['MaxCPU']).all() else np.max(d['MaxCPU']) - max_mem = np.nan if np.isnan( - d['MaxMem']).all() else np.max(d['MaxMem']) - max_disk = np.nan if np.isnan( - d['MaxDisk']).all() else np.max(d['MaxDisk']) - group_data = { - 'Task': group, - 'Hours': hours, - 'AvgCPU': np.mean(d['nCPU']), - 'AvgMem': np.mean(d['TotMem']), - 'AvgDisk': np.mean(d['TotDisk']), - 'MaxCPU': max_cpu, - 'MaxMem': max_mem, - 'MaxDisk': max_disk, - 'PctCPU': np.nan if np.isnan(d['PctCPU']).all() else np.nanmax(d['PctCPU']), - 'PctMem': np.nan if np.isnan(d['PctMem']).all() else np.nanmax(d['PctMem']), - 'PctDisk': np.nan if np.isnan(d['PctDisk']).all() else np.nanmax(d['PctDisk']), - 'TotCPUHour': np.sum(d['TotCPUHour']), - 'TotMemHour': np.sum(d['TotMemHour']), - 'TotDiskHour': np.sum(d['TotDiskHour']), - 'PeakCPUHour': np.nan if np.isnan(d['MaxCPUHour']).all() else np.nanmax(d['MaxCPUHour']), - 'PeakMemHour': np.nan if np.isnan(d['MaxMemHour']).all() else np.nanmax(d['MaxMemHour']), - 'PeakDiskHour': np.nan if np.isnan(d['MaxDiskHour']).all() else np.nanmax(d['MaxDiskHour']), - 'TotCPUCost': np.sum(d['TotCPUCost']), - 'TotMemCost': np.sum(d['TotMemCost']), - 'TotDiskCost': np.sum(d['TotDiskCost']), - 'DynCPUCost': np.sum(d['OptCPUCost']), - 'DynMemCost': np.sum(d['OptMemCost']), - 'DynDiskCost': np.sum(d['OptDiskCost']), - 'StaticCPUCost': COST_CPU_HR * np.nanmax((max_cpu, MIN_CPU)) * hours, - 'StaticMemCost': COST_PER_GB_MEM_HR * np.nanmax((max_mem, MIN_MEM_GB)) * hours, - 'StaticDiskCost': COST_PER_GB_DISK_HR * (np.nanmax((max_disk, MIN_DISK_GB)) + BOOT_DISK_GB) * hours - } - group_data['TotCost'] = sum( - (group_data['TotCPUCost'], group_data['TotMemCost'], group_data['TotDiskCost'])) - group_data['StaticCost'] = sum( - (group_data['StaticCPUCost'], group_data['StaticMemCost'], group_data['StaticDiskCost'])) - group_data['DynCost'] = sum( - (group_data['DynCPUCost'], group_data['DynMemCost'], group_data['DynDiskCost'])) - - data_grouped = pd.concat([data_grouped, pd.DataFrame([group_data])], ignore_index=True) - - data_grouped.sort_values(by='TotCost', inplace=True, ascending=False) - return data_grouped - - -def get_out_file_path(output_base, output_end): - sep = "." - if basename(output_base) == "": - sep = "" - out_file = output_base + sep + output_end - return out_file - - -def write_data(data, out_file): - logging.info("Writing %s" % out_file) - data.to_csv(out_file, sep='\t', na_rep='NaN', index=False) - - -def do_simple_bar(data, xticks, path, bar_width=0.35, height=12, width=12, - xtitle='', ytitle='', title='', bottom_adjust=0, legend=[], - yscale='linear', sort_values=None): - num_groups = max([d.shape[0] for d in data]) - if sort_values is not None: - sort_indexes = np.flip(np.argsort(sort_values)) - else: - sort_indexes = np.arange(num_groups) - plt.figure(num=None, figsize=(width, height), - dpi=100, facecolor='w', edgecolor='k') - for i in range(len(data)): - if i < len(legend): - label = legend[i] - else: - label = "data" + str(i) - x = (np.arange(num_groups) * len(data) + i) * bar_width - plt.bar(x, data[i][sort_indexes], label=label) - x = (np.arange(num_groups) * len(data)) * bar_width - plt.xticks(x, [xticks[i] for i in sort_indexes], rotation='vertical') - plt.xlabel(xtitle) - plt.ylabel(ytitle) - plt.title(title) - plt.subplots_adjust(bottom=bottom_adjust) - plt.yscale(yscale) - plt.legend() - plt.savefig(path) - - -def create_graphs(data, out_file, semilog=False, num_samples=None): - logging.info("Writing %s" % out_file) - # drop rows with any NA values before making plot - data = data.loc[data.notna().all(axis=1)] - data.reset_index(drop=True, inplace=True) - if num_samples is not None: - data = data / num_samples - ytitle = "Cost ($/sample)" - title = "Estimated Cost Per Sample" - else: - ytitle = "Cost ($)" - title = "Estimated Total Cost" - - if semilog: - yscale = "log" - else: - yscale = "linear" - - do_simple_bar(data=[data["TotCost"], data["StaticCost"], data["DynCost"]], - xticks=data['Task'], - path=out_file, - bar_width=1, - height=8, - width=12, - xtitle="Task", - ytitle=ytitle, - title=title, - bottom_adjust=0.35, - legend=["Current", "Uniform", "Dynamic"], - yscale=yscale, - sort_values=data["TotCost"]) - - -def check_file_nonempty(f): - if not isfile(f): - raise RuntimeError("Required input file %s does not exist." % f) - elif getsize(f) == 0: - raise RuntimeError("Required input file %s is empty." % f) - - -# Main function -def main(): - parser = argparse.ArgumentParser() - parser.add_argument( - "log_summary_file", help="Path to log summary TSV from get_cromwell_resource_usage2.sh -u -r") - parser.add_argument("output_base", help="Output tsv file base path") - parser.add_argument("--overhead", help="Localization overhead in minutes") - parser.add_argument("--semilog", help="Plot semilog y", - action="store_true") - parser.add_argument( - "--plot-norm", help="Specify number of samples to normalize plots to per sample") - parser.add_argument("--log-level", - help="Specify level of logging information, ie. info, warning, error (not case-sensitive)", - required=False, default="INFO") - args = parser.parse_args() - - if not args.overhead: - overhead = DEFAULT_OVERHEAD_MIN - else: - overhead = float(args.overhead) - - if args.plot_norm: - plot_norm = int(args.plot_norm) - else: - plot_norm = None - - log_level = args.log_level - numeric_level = getattr(logging, log_level.upper(), None) - if not isinstance(numeric_level, int): - raise ValueError('Invalid log level: %s' % log_level) - logging.basicConfig(level=numeric_level, - format='%(levelname)s: %(message)s') - - log_file, output_base = args.log_summary_file, args.output_base - check_file_nonempty(log_file) - - data = load_data(log_file, overhead) - data = estimate_costs_per_task(data) - write_data(data, get_out_file_path(output_base, "all.tsv")) - grouped_data = estimate_costs_per_group(data) - write_data(grouped_data, get_out_file_path(output_base, "grouped.tsv")) - create_graphs(grouped_data, get_out_file_path( - output_base, "cost.png"), semilog=args.semilog, num_samples=plot_norm) - - -if __name__ == "__main__": - main() diff --git a/scripts/cromwell/analyze_resource_acquisition.py b/scripts/cromwell/analyze_resource_acquisition.py deleted file mode 100644 index 859e54466..000000000 --- a/scripts/cromwell/analyze_resource_acquisition.py +++ /dev/null @@ -1,530 +0,0 @@ -#!/bin/python - -import json -import argparse -import dateutil.parser -import datetime -import pandas as pd -import matplotlib.pyplot as plt -import matplotlib.ticker -from os.path import basename, isfile, getsize -import logging - -""" -Summary: Scrapes workflow metadata to analyze resource acquisition (VMs, CPU, RAM, disk memory), for the purpose - of understanding resource peaks and timing jobs to avoid hitting quotas. - -Usage: - python analyze_resource_acquisition workflow_metadata.json /path/to/output_basename [optional flags] - Optional flags: - --plot-title WorkflowName (WorkflowName will be prepended to plot title) - --save-table (to save a TSV file of the table used to create the plot) - --override-warning (to attempt to run script on workflow that raised a warning) - --log-level LEVEL (specify level of logging information to print, ie. INFO, WARNING, ERROR - not case-sensitive) - -Parameters: - workflow_metadata.json: path to Cromwell metadata file for workflow of interest - /path/to/output_basename: base output path, to which extensions will be appended for each output file, - ie. /output_dir/basename will yield /output_dir/basename.plot.png, /output_dir/basename.table.tsv, etc - or, /output_dir/ will yield plot.png, table.tsv, peaks.txt, etc - -Outputs: - - plot (png) of VMs, CPUs (total, preemptible, and nonpreemptible), RAM, and disk (HDD, SSD) acquisitioned over time - - (optional, with --save-table flag) table of resource acquisition over time for each type of resource listed above - - peak resource acquisition for each type of resource listed above (TSV file) - - number of non-preemptible VMs used (prints to stdout) and the names of the tasks that used them (TSV file, if any) - - number of tasks call-cached (prints to stdout), and their task names (TSV file, if any) -""" - -NUM_CACHED = 0 -CACHED = dict() -NUM_NONPREEMPTIBLE = 0 -NONPREEMPTIBLE_TASKS = dict() - - -def get_disk_info(metadata): - """ - Modified from: https://github.com/broadinstitute/dsde-pipelines/blob/develop/scripts/calculate_cost.py - Modified to return (hdd_size, ssd_size) - """ - if "runtimeAttributes" in metadata and "disks" in metadata['runtimeAttributes']: - boot_disk_gb = 0.0 - if "bootDiskSizeGb" in metadata['runtimeAttributes']: - boot_disk_gb = float( - metadata['runtimeAttributes']['bootDiskSizeGb']) - # Note - am lumping boot disk in with requested disk. Assuming boot disk is same type as requested. - # i.e. is it possible that boot disk is HDD when requested is SDD. - (name, disk_size, - disk_type) = metadata['runtimeAttributes']["disks"].split() - if disk_type == "HDD": - return float(disk_size) + boot_disk_gb, float(0) - elif disk_type == "SSD": - return float(0), float(disk_size) + boot_disk_gb - else: - return float(0), float(0) - else: - # we can't tell disk size in this case so just return nothing - return float(0), float(0) - - -def was_preemptible_vm(metadata, was_cached): - """ - Modified from: https://github.com/broadinstitute/dsde-pipelines/blob/develop/scripts/calculate_cost.py - """ - if was_cached: - return True # if call cached, not any type of VM, but don't inflate nonpreemptible count - elif "runtimeAttributes" in metadata and "preemptible" in metadata['runtimeAttributes']: - pe_count = int(metadata['runtimeAttributes']["preemptible"]) - attempt = int(metadata['attempt']) - - return attempt <= pe_count - else: - # we can't tell (older metadata) so conservatively return false - return False - - -def used_cached_results(metadata): - """ - Modified from: https://github.com/broadinstitute/dsde-pipelines/blob/develop/scripts/calculate_cost.py - """ - return "callCaching" in metadata and "hit" in metadata["callCaching"] and metadata["callCaching"]["hit"] - - -def calculate_start_end(call_info, override_warning=False, alias=None): - """ - Modified from: https://github.com/broadinstitute/dsde-pipelines/blob/develop/scripts/calculate_cost.py - """ - if 'jobId' in call_info: - job_id = call_info['jobId'].split('/')[-1] - if alias is None or alias == "": - alias = job_id - else: - alias += "." + job_id - elif alias is None or alias == "": - alias = "NA" - - # get start (start time of VM start) & end time (end time of 'ok') according to metadata - start = None - end = None - - if 'executionEvents' in call_info: - for x in call_info['executionEvents']: - # ignore incomplete executionEvents (could be due to server restart or similar) - if 'description' not in x: - continue - y = x['description'] - - if 'backend' in call_info and call_info['backend'] == 'PAPIv2': - if y.startswith("PreparingJob"): - start = dateutil.parser.parse(x['startTime']) - if y.startswith("Worker released"): - end = dateutil.parser.parse(x['endTime']) - else: - if y.startswith("start"): - start = dateutil.parser.parse(x['startTime']) - if y.startswith("ok"): - end = dateutil.parser.parse(x['endTime']) - - # if we are preempted or if cromwell used previously cached results, we don't even get a start time from JES. - # if cromwell was restarted, the start time from JES might not have been written to the metadata. - # in either case, use the Cromwell start time which is earlier but not wrong. - if start is None: - start = dateutil.parser.parse(call_info['start']) - - # if we are preempted or if cromwell used previously cached results, we don't get an endTime from JES right now. - # if cromwell was restarted, the start time from JES might not have been written to the metadata. - # in either case, use the Cromwell end time which is later but not wrong - if end is None: - if 'end' in call_info: - end = dateutil.parser.parse(call_info['end']) - elif override_warning: - logging.warning( - "End time not found, omitting job {}".format(alias)) - end = start - else: - raise RuntimeError((f"End time not found for job {alias} (may be running or have been aborted)." - " Run again with --override-warning to continue anyway and omit the job.")) - - return start, end - - -def get_mem_cpu(m): - """ - Modified from: https://github.com/broadinstitute/dsde-pipelines/blob/develop/scripts/calculate_cost.py - """ - cpu = 'na' - memory = 'na' - if 'runtimeAttributes' in m: - if 'cpu' in m['runtimeAttributes']: - cpu = int(m['runtimeAttributes']['cpu']) - if 'memory' in m['runtimeAttributes']: - mem_str = m['runtimeAttributes']['memory'] - memory = float(mem_str[:mem_str.index(" ")]) - return cpu, memory - - -def add_label_to_alias(alias, labels): - # In alias, track hierarchy of workflow/task up to current task nicely without repetition - if alias is None: - alias = "" - to_add = "" - if 'wdl-call-alias' in labels: - to_add = labels['wdl-call-alias'] - elif 'wdl-task-name' in labels: - to_add = labels['wdl-task-name'] - if to_add != "" and not alias.endswith(to_add): - if alias != "" and alias[-1] != ".": - alias += "." - alias += to_add - - return alias - - -def get_call_alias(alias, call): - # In call_alias, track hierarchy of workflow/task up to current call nicely without repetition - if alias is None: - alias = "" - call_split = call.split('.') - call_name = call - if alias.endswith(call_split[0]): - call_name = call_split[1] - call_alias = alias - if call_alias != "" and call_alias[-1] != ".": - call_alias += "." - call_alias += call_name - - return call_alias - - -def update_nonpreemptible_counters(alias): - global NUM_NONPREEMPTIBLE - global NONPREEMPTIBLE_TASKS - NUM_NONPREEMPTIBLE += 1 - if alias in NONPREEMPTIBLE_TASKS: - NONPREEMPTIBLE_TASKS[alias] += 1 - else: - NONPREEMPTIBLE_TASKS[alias] = 1 - - -def update_cached_counters(alias): - global CACHED - global NUM_CACHED - NUM_CACHED += 1 - if alias in CACHED: - CACHED[alias] += 1 - else: - CACHED[alias] = 1 - - -def get_calls(m, override_warning=False, alias=None): - """ - Modified from download_monitoring_logs.py script by Mark Walker - https://github.com/broadinstitute/gatk-sv/blob/master/scripts/cromwell/download_monitoring_logs.py - """ - if isinstance(m, list): - call_metadata = [] - for m_shard in m: - call_metadata.extend( - get_calls(m_shard, override_warning, alias=alias)) - return call_metadata - - if 'labels' in m: - alias = add_label_to_alias(alias, m['labels']) - - call_metadata = [] - if 'calls' in m: - for call in m['calls']: - # Skips scatters that don't contain calls - if '.' not in call: - continue - call_alias = get_call_alias(alias, call) - # recursively get metadata - call_metadata.extend( - get_calls(m['calls'][call], override_warning, alias=call_alias)) - - if 'subWorkflowMetadata' in m: - call_metadata.extend( - get_calls(m['subWorkflowMetadata'], override_warning, alias=alias)) - - # in a call - if alias and ('stderr' in m): - start, end = calculate_start_end(m, override_warning, alias) - - cpu, memory = get_mem_cpu(m) - - cached = used_cached_results(m) - - preemptible = was_preemptible_vm(m, cached) - preemptible_cpu = 0 - nonpreemptible_cpu = 0 - if preemptible: - preemptible_cpu = cpu - else: - nonpreemptible_cpu = cpu - - hdd_size, ssd_size = get_disk_info(m) - - call_metadata.append((start, 1, cpu, preemptible_cpu, - nonpreemptible_cpu, memory, hdd_size, ssd_size)) - call_metadata.append((end, -1, -1 * cpu, -1 * preemptible_cpu, -1 * nonpreemptible_cpu, -1 * memory, -1 * hdd_size, - -1 * ssd_size)) - if not preemptible: - update_nonpreemptible_counters(alias) - - if cached: - update_cached_counters(alias) - - return call_metadata - - -def check_workflow_valid(metadata, metadata_file, override_warning): - # these errors cannot be overcome - if 'status' not in metadata: - raise RuntimeError( - "Incomplete metadata input file %s. File lacks workflow status field." % metadata_file) - # Unrecognized workflow ID failure - unable to download metadata - if metadata['status'] == "fail": - err_msg = "Workflow metadata download failure." - if 'message' in metadata: - err_msg += " Message: " + metadata['message'] - raise RuntimeError(err_msg) - - # these errors may be able to be overcome for partial output - found_retryable_error = False - if metadata['status'] == "Failed": - logging.warning( - "Workflow failed, which is likely to impact plot accuracy.") - found_retryable_error = True - for event in metadata['workflowProcessingEvents']: - if event['description'] == "Released": - logging.warning( - "Server was interrupted during workflow execution, which is likely to impact plot accuracy.") - found_retryable_error = True - break - if found_retryable_error: - if override_warning: - logging.info("Override_warning=TRUE. Proceeding with caution.") - else: - raise RuntimeError(("One or more retryable errors encountered (see logging info for warnings). " - "To attempt to proceed anyway, re-run the script with the --override-warning flag.")) - - -def get_call_metadata(metadata_file, override_warning=False): - """ - Based on: https://github.com/broadinstitute/gatk-sv/blob/master/scripts/cromwell/download_monitoring_logs.py - """ - metadata = json.load(open(metadata_file, 'r')) - check_workflow_valid(metadata, metadata_file, override_warning) - colnames = ['timestamp', 'vm_delta', 'cpu_all_delta', 'cpu_preemptible_delta', 'cpu_nonpreemptible_delta', - 'memory_delta', 'hdd_delta', 'ssd_delta'] - - call_metadata = get_calls(metadata, override_warning) - if len(call_metadata) == 0: - raise RuntimeError("No calls in workflow metadata.") - call_metadata = pd.DataFrame(call_metadata, columns=colnames) - - return call_metadata - - -def transform_call_metadata(call_metadata): - """ - Based on: https://github.com/broadinstitute/dsde-pipelines/blob/master/scripts/quota_usage.py - """ - call_metadata = call_metadata.sort_values(by='timestamp') - # make timestamps start from 0 by subtracting minimum (at index 0 after sorting) - call_metadata['timestamp_zero'] = call_metadata['timestamp'] - \ - call_metadata.timestamp.iloc[0] - # get timedelta in seconds because plot labels won't format correctly otherwise - call_metadata['seconds'] = call_metadata['timestamp_zero'].dt.total_seconds() - - call_metadata['vm'] = call_metadata.vm_delta.cumsum() - call_metadata['cpu_all'] = call_metadata.cpu_all_delta.cumsum() - call_metadata['cpu_preemptible'] = call_metadata.cpu_preemptible_delta.cumsum() - call_metadata['cpu_nonpreemptible'] = call_metadata.cpu_nonpreemptible_delta.cumsum() - call_metadata['memory'] = call_metadata.memory_delta.cumsum() - call_metadata['ssd'] = call_metadata.ssd_delta.cumsum() - call_metadata['hdd'] = call_metadata.hdd_delta.cumsum() - - return call_metadata - - -def plot_resources_time(df, title_name, output_name): - """ - Modified from: https://github.com/broadinstitute/dsde-pipelines/blob/master/scripts/quota_usage.py - """ - logging.info("Writing " + output_name) - colors = { - "vm": "#006FA6", # blue - "cpu_all": "black", - "cpu_preemptible": "#10a197", # turquoise - "cpu_nonpreemptible": "#A30059", # dark pink - "memory": "#FF4A46", # coral red - "hdd": "#72418F", # purple - "ssd": "#008941", # green - } - LABEL_SIZE = 17 - TITLE_SIZE = 20 - TICK_SIZE = 15 - - fig, ax = plt.subplots(4, 1, figsize=(14, 26), sharex=True) - ax[0].set_title( - title_name + "Resource Acquisition Over Time", fontsize=TITLE_SIZE) - - ax[0].plot(df['seconds'], df['vm'], color=colors["vm"]) - ax[0].set_ylabel("VMs", fontsize=LABEL_SIZE) - plt.setp(ax[0].get_yticklabels(), fontsize=TICK_SIZE) - - ax[1].plot(df['seconds'], df['cpu_all'], - color=colors["cpu_all"], linewidth=2, label="All") - ax[1].plot(df['seconds'], df['cpu_preemptible'], - color=colors["cpu_preemptible"], linestyle="dashed", label="Preemptible") - ax[1].plot(df['seconds'], df['cpu_nonpreemptible'], - color=colors["cpu_nonpreemptible"], linestyle="dashed", label="Non-preemptible") - ax[1].set_ylabel("CPU Cores", fontsize=LABEL_SIZE) - plt.setp(ax[1].get_yticklabels(), fontsize=TICK_SIZE) - ax[1].legend(loc="upper right", title="CPU Types", - fontsize=TICK_SIZE, title_fontsize=TICK_SIZE) - - ax[2].plot(df['seconds'], df['memory'], color=colors["memory"]) - ax[2].set_ylabel("RAM (GiB)", fontsize=LABEL_SIZE) - plt.setp(ax[2].get_yticklabels(), fontsize=TICK_SIZE) - - ax[3].plot(df['seconds'], df['hdd'], color=colors["hdd"], label="HDD") - ax[3].plot(df['seconds'], df['ssd'], color=colors["ssd"], label="SSD") - ax[3].set_ylabel("Disk Memory (GiB)", fontsize=LABEL_SIZE) - plt.setp(ax[3].get_yticklabels(), fontsize=TICK_SIZE) - ax[3].legend(loc="upper right", title="Disk Types", - fontsize=TICK_SIZE, title_fontsize=TICK_SIZE) - - formatter = matplotlib.ticker.FuncFormatter( - lambda x, pos: str(datetime.timedelta(seconds=x))) - ax[3].xaxis.set_major_formatter(formatter) - plt.setp(ax[3].get_xticklabels(), rotation=15, fontsize=TICK_SIZE) - ax[3].set_xlabel("Time", fontsize=LABEL_SIZE) - - fig.savefig(output_name, bbox_inches='tight') - - -def write_resources_time_table(call_metadata, table_file): - logging.info("Writing " + table_file) - call_metadata.to_csv( - table_file, - columns=["timestamp", "seconds", "vm", "cpu_all", - "cpu_preemptible", "cpu_nonpreemptible", "memory", "hdd", "ssd"], - sep='\t', - index=False, - date_format='%Y-%m-%dT%H:%M%:%SZ' - ) - - -def write_peak_usage(m, peak_file): - logging.info("Writing " + peak_file) - with open(peak_file, 'w') as out: - out.write("peak_vms\t" + str(max(m['vm'])) + "\n") - out.write("peak_cpu_all\t" + str(max(m['cpu_all'])) + "\n") - out.write("peak_cpu_preemptible\t" + - str(max(m['cpu_preemptible'])) + "\n") - out.write("peak_cpu_nonpreemptible\t" + - str(max(m['cpu_nonpreemptible'])) + "\n") - out.write("peak_ram_gib\t" + "{:.2f}".format(max(m['memory'])) + "\n") - out.write("peak_disk_hdd_gib\t" + str(max(m['hdd'])) + "\n") - out.write("peak_disk_ssd_gib\t" + str(max(m['ssd'])) + "\n") - - -def write_cached_warning(cached_file): - global CACHED - global NUM_CACHED - if NUM_CACHED > 0: - logging.info("%d cached task(s) found, writing task(s) to %s." % - (NUM_CACHED, cached_file)) - with open(cached_file, 'w') as cached_out: - cached_out.write("#task_name\tnum_cached\n") - cached_out.write("all_tasks\t%d\n" % NUM_CACHED) - cached_out.write("\n".join( - [x + '\t' + str(CACHED[x]) for x in sorted(list(CACHED.keys()))]) + "\n") - else: - logging.info("0 cached tasks found.") - - -def write_nonpreemptible_vms(vms_file): - global NUM_NONPREEMPTIBLE - global NONPREEMPTIBLE_TASKS - if NUM_NONPREEMPTIBLE > 0: - logging.info("%d non-preemptible VM(s) found, writing task(s) to %s." % - (NUM_NONPREEMPTIBLE, vms_file)) - with open(vms_file, 'w') as vms_out: - vms_out.write("#task_name\tnum_nonpreemptible\n") - vms_out.write("all_tasks\t%d\n" % NUM_NONPREEMPTIBLE) - vms_out.write("\n".join([x + '\t' + str(NONPREEMPTIBLE_TASKS[x]) - for x in sorted(list(NONPREEMPTIBLE_TASKS.keys()))]) + '\n') - else: - logging.info("0 non-preemptible VMs found.") - - -def check_file_nonempty(f): - if not isfile(f): - raise RuntimeError( - "Required metadata input file %s does not exist." % f) - elif getsize(f) == 0: - raise RuntimeError("Required metadata input file %s is empty." % f) - - -# Main function -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("workflow_metadata", - help="Workflow metadata JSON file") - parser.add_argument("output_base", help="Output directory + basename") - parser.add_argument("--plot-title", - help="Provide workflow name for plot title: Resource Acquisition Over Time", - required=False, default="") - parser.add_argument("--override-warning", - help="Execute script despite workflow warning (server interrupted, workflow failed, etc.), \ - which may impact plot accuracy", - required=False, default=False, action='store_true') - parser.add_argument("--save-table", help="Save TSV copy of resources over time table used to make plot", - required=False, default=False, action='store_true') - parser.add_argument("--log-level", - help="Specify level of logging information, ie. info, warning, error (not case-sensitive)", - required=False, default="INFO") - args = parser.parse_args() - - # get args as variables - metadata_file, output_base = args.workflow_metadata, args.output_base # required args - plt_title, override_warning, save_table, log_level = args.plot_title, args.override_warning, args.save_table, args.log_level # optional args - - # set attributes based on input parameters - numeric_level = getattr(logging, log_level.upper(), None) - if not isinstance(numeric_level, int): - raise ValueError('Invalid log level: %s' % log_level) - logging.basicConfig(level=numeric_level, - format='%(levelname)s: %(message)s') - if plt_title != "": - plt_title += " " - sep = "." - if basename(output_base) == "": - sep = "" - - check_file_nonempty(metadata_file) - call_metadata = get_call_metadata(metadata_file, override_warning) - call_metadata = transform_call_metadata(call_metadata) - - cached_file = output_base + sep + "cached.tsv" - write_cached_warning(cached_file) - - vms_file = output_base + sep + "vms_file.tsv" - write_nonpreemptible_vms(vms_file) - - plot_file = output_base + sep + "plot.png" - plot_resources_time(call_metadata, plt_title, plot_file) - - if save_table: - table_file = output_base + sep + "table.tsv" - write_resources_time_table(call_metadata, table_file) - - peak_file = output_base + sep + "peaks.tsv" - write_peak_usage(call_metadata, peak_file) - - -if __name__ == "__main__": - main() diff --git a/scripts/cromwell/copy_cromwell_results.sh b/scripts/cromwell/copy_cromwell_results.sh deleted file mode 100755 index 7d1c8473c..000000000 --- a/scripts/cromwell/copy_cromwell_results.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -# USAGE: copy_cromwell_results.sh WORKFLOW_ID [DESTINATION_PATH] [SUB_FOLDER] -# Copy all output files from cromwell workflow to flat folder at -# destination path. -# -If cromshell is not available on the path as "cromshell" -# then define the ENV variable CROMSHELL with the appropriate path. -# -If DESTINATION_PATH is not specified, it is assumed to be -# "gs://broad-sv-dev-data/caller_benchmark/callers" -# -if SUB_FOLDER is not defined, copy directly to DESTINATION_PATH -set -Eeuo pipefail - -WORKFLOW_ID=$1 -DESTINATION_PATH=${DESTINATION_PATH:-"gs://broad-sv-dev-data/caller_benchmark/callers"} -DESTINATION_PATH=${2:-$DESTINATION_PATH} -SUB_FOLDER=${3:-""} - -if [ -n "$SUB_FOLDER" ]; then - GCS_OUTPUT_DIR="${DESTINATION_PATH%/}/${SUB_FOLDER%/}" -else - GCS_OUTPUT_DIR="${DESTINATION_PATH%/}" -fi - -# find the outputs -CROMSHELL=${CROMSHELL:-cromshell} -$CROMSHELL -t 200 metadata $WORKFLOW_ID > ./metadata.json -OUTPUTS=$(jq '.outputs | .[]' ./metadata.json \ - | sed -e 's/ //g' -e 's/^"//' -e 's/,$//' -e 's/"$//' \ - | grep -v '\]\|\[' | grep -v "null" \ - | sort) -# copy the outputs to the requested GCS location -printf "$OUTPUTS\n./metadata.json\n" | gsutil -m cp -I "$GCS_OUTPUT_DIR/" diff --git a/scripts/cromwell/copy_outputs.py b/scripts/cromwell/copy_outputs.py deleted file mode 100644 index 3143c812f..000000000 --- a/scripts/cromwell/copy_outputs.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/python - -import sys -import json -import os.path -import argparse -from google.cloud import storage - -# Synopsis: -# Copies workflow outputs needed for downstream processing to a destination bucket. -# Author: Mark Walker (markw@broadinstitute.org) - - -def get_uris(metadata, dest_prefix): - if 'workflowName' not in metadata: - raise ValueError("Workflow name not found. Check metadata file.") - outputs = metadata['outputs'] - for source_uri in outputs.values(): - if source_uri is not None and source_uri.startswith("gs://"): - dest_filename = os.path.basename(source_uri) - dest_uri = os.path.join(dest_prefix, dest_filename) - yield source_uri, dest_uri - - -def copy_blob(storage_client, bucket_name, blob_name, destination_bucket_name, destination_blob_name): - source_bucket = storage_client.bucket(bucket_name) - source_blob = source_bucket.blob(blob_name) - destination_bucket = storage_client.bucket(destination_bucket_name) - destination_blob = destination_bucket.blob(destination_blob_name) - source_uri = f"gs://{source_bucket.name}/{source_blob.name}" - destination_uri = f"gs://{destination_bucket.name}/{destination_blob_name}" - if destination_blob.exists(): - sys.stderr.write( - f"Target {destination_uri} exists, cautiously refusing to overwrite. Aborting...\n") - sys.exit(1) - sys.stderr.write(f"Copying {source_uri}...") - (token, bytes_rewritten, total_bytes) = destination_blob.rewrite(source=source_blob) - while token is not None: - (token, bytes_rewritten, total_bytes) = destination_blob.rewrite( - source=source_blob, token=token) - size_kb = int(bytes_rewritten / 1024) - sys.stderr.write(f"done ({size_kb} KB)\n") - - -def copy_uri(source_uri, dest_uri, storage_client): - def _parse_uri(uri): - tokens = uri.split('/') - bucket_name = tokens[2] - bucket_object = '/'.join(tokens[3:]) - return bucket_name, bucket_object - source_bucket_name, source_blob_name = _parse_uri(source_uri) - dest_bucket_name, dest_blob_name = _parse_uri(dest_uri) - copy_blob(storage_client, source_bucket_name, - source_blob_name, dest_bucket_name, dest_blob_name) - - -# Main function -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("--metadata", help="Workflow metadata JSON file", required=True) - parser.add_argument("--dest", help="Destination GCS URI (e.g. \"gs://my-bucket/output\")", required=True) - args = parser.parse_args() - metadata = json.load(open(args.metadata, 'r')) - output_uris = get_uris(metadata, args.dest) - client = storage.Client() - for source_uri, dest_uri in output_uris: - copy_uri(source_uri, dest_uri, client) - - -if __name__ == "__main__": - main() diff --git a/scripts/cromwell/cromwell_monitoring_script.sh b/scripts/cromwell/cromwell_monitoring_script.sh deleted file mode 100755 index 3570fd62b..000000000 --- a/scripts/cromwell/cromwell_monitoring_script.sh +++ /dev/null @@ -1,290 +0,0 @@ -#!/bin/bash -# NOTE: this script is intended to be placed in google cloud storage -# and invoked by adding the following line to your cromwell workflow -# options: -# "monitoring_script": "gs://bucket/path/to/cromwell_monitoring_script.sh" -# Upon task completion "monitoring.log" will be added to the appropriate -# cloud storage folder. -set -Eeuo pipefail - -MONITOR_MOUNT_POINT=${MONITOR_MOUNT_POINT:-"/cromwell_root"} -SLEEP_TIME=${SLEEP_TIME:-"10"} - -function getCpuUsage() { - # get the summary cpu statistics (i.e. for all cpus) since boot - # get the numeric values in an array, dropping the first field (the - # string, "cpu") - CPU_TIMES=(`sed -n 's/^cpu\s//p' /proc/stat`) - # idle time (in system units) is the 3rd numeric field - IDLE_TIME=${CPU_TIMES[3]} - # total cpu time is sum of all fields - TOTAL_TIME=0 - for T in ${CPU_TIMES[@]}; do - ((TOTAL_TIME += T)) - done - - # get the previous times from temp file - read PREVIOUS_IDLE PREVIOUS_TOTAL < $TEMP_CPU - - # write current times to temp file - echo "$IDLE_TIME $TOTAL_TIME" > $TEMP_CPU - - # get the difference in idle and total times since the previous - # update, and report the usage as: non-idle time as a percentage - # of total time - awk -v IDLE=$((IDLE_TIME-PREVIOUS_IDLE)) \ - -v TOTAL=$((TOTAL_TIME-PREVIOUS_TOTAL)) \ - 'BEGIN { printf "%.1f%%", 100 * (1 - IDLE / TOTAL) }' -} - -function getMem() { - # get desired memory value from /proc/meminfo, in GiB, and also - # as a percentage of total - # argument is the label of the desired memory value - cat /proc/meminfo \ - | awk -v MEM_FIELD="$1" '{ - f[substr($1, 1, length($1)-1)] = $2 - } END { - printf "%.2f GiB", f[MEM_FIELD] / 1048576 - }' -} - -function getMemUnavailable() { - # get unavailable memory from /proc/meminfo, in GiB - cat /proc/meminfo \ - | awk '{ - f[substr($1, 1, length($1)-1)] = $2 - } END { - - if("MemAvailable" in f) { - mem_available = f["MemAvailable"] - } else { - mem_available = f["MemFree"] + f["Buffers"] + f["Cached"] - } - mem_in_use = f["MemTotal"] - mem_available - printf "%.2f GiB %.1f%%", mem_in_use / 1048576, 100 * mem_in_use / f["MemTotal"] - }' -} - -# old version using "free -m" are kept in case a container somehow has -# weird values in /proc/meminfo -function getMem_with_free() { - # get memory info from "free" command. Convert to float in GB. - # First argument is desired row of output table. - # Second argument is desired column. - MEM_ROW=$(echo "$1" | awk '{print tolower($1)}') - MEM_COLUMN=$(echo "$2" | awk '{print tolower($1)}') - free -m | awk -v MEM_ROW=$MEM_ROW -v MEM_COLUMN=$MEM_COLUMN \ - 'NR=1 { - for(i=1; i<=NF; i++) { f[tolower($i)]=NF+1-i } - } - { - regex="^"MEM_ROW - if(tolower($1) ~ regex) { - print $(NF+1-f[MEM_COLUMN])/1024 " GiB" - } - }' -} - -# old version using "free -m" are kept in case a container somehow has -# weird values in /proc/meminfo -function getMemUnavailable_using_free() { - # get memory that is in active use (not just cached) from "free" - # command. Convert to float in GiB, followed by percent of total. - # NOTE: weird computation with awk due to variety of output from - # free on different systems. Rows and columns differ, and on some - # systems the desired quantity is used "used" memory, on most it's - # "used" - "buffers" - "cached". If "buffers" and "cached" don't - # exist, then awk will subtract 0 so the correct result is returned. - free -m \ - | awk '\ - NR=1 { - for(i=1; i<=NF; i++) { f[tolower($i)]=NF+1-i } - } - { - if(tolower($1) ~ "^mem") { - IN_USE=($(NF+1-f["used"]) - $(NF+1-f["buffers"]) - $(NF+1-f["cached"])) - printf "%.3f GiB %.1f%%", IN_USE/1024, 100*IN_USE/$(NF+1-f["total"]) - } - }' -} - - -function getDisk() { - # get information about disk usage from "df" command. - DISK_COLUMN=$(echo "$1" | awk '{print tolower($1)}') - MOUNT_POINT=$2 - # extract desired value - VALUE=$(\ - df -h "$MOUNT_POINT" \ - | sed 's/Mounted on/Mounted-on/' \ - | awk -v DISK_COLUMN=$DISK_COLUMN ' - FNR==1 { - NF_HEADER=NF - for(i=1; i<=NF; i++) { f[tolower($i)]=NF-i } - } - FNR>1 { - FIELD_NUM=NF-f[DISK_COLUMN] - if(FIELD_NUM > 0) { - VALUE=$(FIELD_NUM) - print VALUE - } else if(f[DISK_COLUMN] == NF_HEADER-1 && NF == 1) { - VALUE=$(1) - print VALUE - } - }' \ - ) - # If value is a number follwed by letters, it is a value with units - # and needs to be converted. Otherwise just print value - if [[ "$VALUE" =~ [0-9.]+[A-z]+ ]]; then - echo "$VALUE"\ - | sed -E 's/([0-9.]*)([^0-9.]*)/\1 \2/' \ - | awk '{ - UNIT=substr($2, 1, 1) - if(UNIT == "T") { - SCALE=2^10 - } else if(UNIT == "G") { - SCALE=1 - } else if(UNIT == "M") { - SCALE=2^-10 - } else if(UNIT == "K") { - SCALE=2^-20 - } else if(UNIT == "B") { - SCALE=2^-30 - } else { - SCALE=1 - } - printf "%.3f GiB", $1 * SCALE - }' - else - echo "$VALUE" - fi -} - -function findBlockDevice() { - MOUNT_POINT=$1 - FILESYSTEM=$(grep -E "$MOUNT_POINT\s" /proc/self/mounts \ - | awk '{print $1}') - DEVICE_NAME=$(basename "$FILESYSTEM") - FS_IN_BLOCK=$(find -L /sys/block/ -mindepth 2 -maxdepth 2 -type d \ - -name "$DEVICE_NAME") - if [ -n "$FS_IN_BLOCK" ]; then - # found path to the filesystem in the block devices. get the - # block device as the parent dir - dirname "$FS_IN_BLOCK" - elif [ -d "/sys/block/$DEVICE_NAME" ]; then - # the device is itself a block device - echo "/sys/block/$DEVICE_NAME" - else - # couldn't find, possibly mounted by mapper. - # look for block device that is just the name of the symlinked - # original file. if not found, echo empty string (no device found) - BLOCK_DEVICE=$(ls -l "$FILESYSTEM" 2>/dev/null \ - | cut -d'>' -f2 \ - | xargs basename 2>/dev/null \ - || echo) - if [[ -z "$BLOCK_DEVICE" ]]; then - 1>&2 echo "Unable to find block device for filesystem $FILESYSTEM." - if [[ -d /sys/block/sdb ]] && ! grep -qE "^/dev/sdb" /etc/mtab; then - 1>&2 echo "Guessing present but unused sdb is the correct block device." - echo "/sys/block/sdb" - else - 1>&2 echo "Disk IO will not be monitored." - fi - fi - fi -} - -function handle_integer_wrap() { - if [ $1 -ge 0 ]; then - echo $1 - else - WRAPPED=$1 - echo "$((WRAPPED + 2**30))" - fi -} - - - -function getBlockDeviceIO() { - # get read and write IO rate by looking at appropriate block device - STAT_FILE="$1" - if [[ -f "$STAT_FILE" ]]; then - # get IO stats as comma-separated list to extract 3rd and 7th fields - STATS=$(sed -E 's/[[:space:]]+/,/g' $STAT_FILE | sed -E 's/^,//'\ - | cut -d, -f3,7 | sed -E 's/,/ /g') - # get results of previous poll - read OLD_READ OLD_WRITE < $TEMP_IO - # save new poll results - read READ_SECTORS WRITE_SECTORS <<<$STATS - echo "$READ_SECTORS $WRITE_SECTORS" > $TEMP_IO - # update read and write sectors as difference since previous poll - READ_SECTORS=$(handle_integer_wrap $((READ_SECTORS - OLD_READ))) - WRITE_SECTORS=$(handle_integer_wrap $((WRITE_SECTORS - OLD_WRITE))) - - # output change in read/write sectors in kiB/s - echo "$READ_SECTORS $WRITE_SECTORS" \ - | awk -v T=$SLEEP_TIME -v B=$SECTOR_BYTES \ - '{ printf "%.3f MiB/s %.3f MiB/s", $1*B/T/1048576, $2*B/T/1048576 }' - else - echo "N/A MiB/s N/A MiB/s" - fi -} - - -function runtimeInfo() { - echo [$(date)] - echo \* CPU usage: $(getCpuUsage) - echo \* Memory usage: $(getMemUnavailable) - echo \* Disk usage: $(getDisk Used $MONITOR_MOUNT_POINT) $(getDisk Use% $MONITOR_MOUNT_POINT) - echo \* Read/Write IO: $(getBlockDeviceIO "$BLOCK_DEVICE_STAT_FILE") -} - -# print out header info -echo ================================== -echo =========== MONITORING =========== -echo ================================== -echo --- General Information --- -echo \#CPU: $(nproc) -echo Total Memory: $(getMem MemTotal) -echo Total Disk space: $(getDisk Size "$MONITOR_MOUNT_POINT") -echo -echo --- Runtime Information --- - - -# make a temp file to store io information, remove it on exit -TEMP_IO=$(mktemp "${TMPDIR:-/tmp/}$(basename $0).XXXXXXXXXXXX") -# make a temp file to store cpu information, remove it on exit -# remove temp files on exit -TEMP_CPU=$(mktemp "${TMPDIR:-/tmp/}$(basename $0).XXXXXXXXXXXX") -trap "rm -f $TEMP_IO $TEMP_CPU" EXIT - - -# find the block device -BLOCK_DEVICE=$(findBlockDevice "$MONITOR_MOUNT_POINT") -if [[ -z "$BLOCK_DEVICE" ]] \ - || [[ ! -f "$BLOCK_DEVICE/queue/hw_sector_size" ]]; then - # no block device found, can't get IO info - SECTOR_BYTES=0 - BLOCK_DEVICE_STAT_FILE="" -else - SECTOR_BYTES=$(cat "$BLOCK_DEVICE/queue/hw_sector_size") - BLOCK_DEVICE_STAT_FILE="$BLOCK_DEVICE/stat" -fi - - -# since getBlockDeviceIO looks at differences in stat file, run the -# update so the first reported update has a sensible previous result to -# compare to -echo "0 0" > $TEMP_IO -getBlockDeviceIO "$BLOCK_DEVICE_STAT_FILE" > /dev/null - -# same thing for getCpuUsage -echo "0 0" > $TEMP_CPU -getCpuUsage > /dev/null - - -while true; do - runtimeInfo - sleep $SLEEP_TIME -done diff --git a/scripts/cromwell/cromwell_monitoring_script2.sh b/scripts/cromwell/cromwell_monitoring_script2.sh deleted file mode 100755 index d23119486..000000000 --- a/scripts/cromwell/cromwell_monitoring_script2.sh +++ /dev/null @@ -1,254 +0,0 @@ -#!/bin/bash -# NOTE: this script is intended to be placed in google cloud storage -# and invoked by adding the following line to your cromwell workflow -# options: -# "monitoring_script": "gs://bucket/path/to/cromwell_monitoring_script2.sh" -# Upon task completion "monitoring.log" will be added to the appropriate -# cloud storage folder. -set -Eeuo pipefail - -MONITOR_MOUNT_POINT=${MONITOR_MOUNT_POINT:-"/cromwell_root"} -SLEEP_TIME=${SLEEP_TIME:-"10"} - -function getCpuUsage() { - # get the summary cpu statistics (i.e. for all cpus) since boot - # get the numeric values in an array, dropping the first field (the - # string, "cpu") - CPU_TIMES=($(sed -n 's/^cpu\s//p' /proc/stat)) - # idle time (in system units) is the 3rd numeric field - IDLE_TIME=${CPU_TIMES[3]} - # total cpu time is sum of all fields - TOTAL_TIME=0 - for T in "${CPU_TIMES[@]}"; do - ((TOTAL_TIME += T)) - done - - # get the previous times from temp file - read PREVIOUS_IDLE PREVIOUS_TOTAL < $TEMP_CPU - - # write current times to temp file - echo "$IDLE_TIME $TOTAL_TIME" > $TEMP_CPU - - # get the difference in idle and total times since the previous - # update, and report the usage as: non-idle time as a percentage - # of total time - awk -v IDLE=$((IDLE_TIME-PREVIOUS_IDLE)) \ - -v TOTAL=$((TOTAL_TIME-PREVIOUS_TOTAL)) \ - 'BEGIN { printf "%.1f", 100 * (1 - IDLE / TOTAL) }' -} - -function getMem() { - # get desired memory value from /proc/meminfo, in GiB, and also - # as a percentage of total - # argument is the label of the desired memory value - cat /proc/meminfo \ - | awk -v MEM_FIELD="$1" '{ - f[substr($1, 1, length($1)-1)] = $2 - } END { - printf "%.2f GiB", f[MEM_FIELD] / 1048576 - }' -} - -function getMemUnavailable() { - # get unavailable memory from /proc/meminfo, in GiB - cat /proc/meminfo \ - | awk '{ - f[substr($1, 1, length($1)-1)] = $2 - } END { - if("MemAvailable" in f) { - mem_available = f["MemAvailable"] - } else { - mem_available = f["MemFree"] + f["Buffers"] + f["Cached"] - } - mem_in_use = f["MemTotal"] - mem_available - printf "%.2f\t%.1f", mem_in_use / 1048576, 100 * mem_in_use / f["MemTotal"] - }' -} - -function getDisk() { - # get information about disk usage from "df" command. - DISK_COLUMN=$(echo "$1" | awk '{print tolower($1)}') - MOUNT_POINT=$2 - # extract desired value - VALUE=$(\ - df -h "$MOUNT_POINT" \ - | sed 's/Mounted on/Mounted-on/' \ - | awk -v DISK_COLUMN=$DISK_COLUMN ' - FNR==1 { - NF_HEADER=NF - for(i=1; i<=NF; i++) { f[tolower($i)]=NF-i } - } - FNR>1 { - FIELD_NUM=NF-f[DISK_COLUMN] - if(FIELD_NUM > 0) { - VALUE=$(FIELD_NUM) - print VALUE - } else if(f[DISK_COLUMN] == NF_HEADER-1 && NF == 1) { - VALUE=$(1) - print VALUE - } - }' \ - ) - # If value is a number followed by letters, it is a value with units - # and needs to be converted. - # If value is a %, print the number and strip the value - # Otherwise just print value - if [[ "$VALUE" =~ [0-9.]+[%a-zA-Z]+ ]]; then - echo "$VALUE"\ - | sed -E 's/([0-9.]*)([^0-9.]*)/\1 \2/' \ - | awk '{ - UNIT=substr($2, 1, 1) - if(UNIT == "T") { - SCALE=2^10 - } else if(UNIT == "G") { - SCALE=1 - } else if(UNIT == "M") { - SCALE=2^-10 - } else if(UNIT == "K") { - SCALE=2^-20 - } else if(UNIT == "B") { - SCALE=2^-30 - } else { - SCALE=1 - } - printf "%.1f", $1 * SCALE - }' - else - echo "$VALUE" - fi -} - -function findBlockDevice() { - MOUNT_POINT=$1 - FILESYSTEM=$(grep -E "$MOUNT_POINT\s" /proc/self/mounts \ - | awk '{print $1}') - DEVICE_NAME=$(basename "$FILESYSTEM") - FS_IN_BLOCK=$(find -L /sys/block/ -mindepth 2 -maxdepth 2 -type d \ - -name "$DEVICE_NAME") - if [ -n "$FS_IN_BLOCK" ]; then - # found path to the filesystem in the block devices. get the - # block device as the parent dir - dirname "$FS_IN_BLOCK" - elif [ -d "/sys/block/$DEVICE_NAME" ]; then - # the device is itself a block device - echo "/sys/block/$DEVICE_NAME" - else - # couldn't find, possibly mounted by mapper. - # look for block device that is just the name of the symlinked - # original file. if not found, echo empty string (no device found) - BLOCK_DEVICE=$(ls -l "$FILESYSTEM" 2>/dev/null \ - | cut -d'>' -f2 \ - | xargs basename 2>/dev/null \ - || echo) - if [[ -z "$BLOCK_DEVICE" ]]; then - 1>&2 echo "Unable to find block device for filesystem $FILESYSTEM." - if [[ -d /sys/block/sdb ]] && ! grep -qE "^/dev/sdb" /etc/mtab; then - 1>&2 echo "Guessing present but unused sdb is the correct block device." - echo "/sys/block/sdb" - else - 1>&2 echo "Disk IO will not be monitored." - fi - fi - fi -} - -function handle_integer_wrap() { - if [ $1 -ge 0 ]; then - echo $1 - else - WRAPPED=$1 - echo "$((WRAPPED + 2**30))" - fi -} - -function getBlockDeviceIO() { - # get read and write IO rate by looking at appropriate block device - STAT_FILE="$1" - if [[ -f "$STAT_FILE" ]]; then - # get IO stats as comma-separated list to extract 3rd and 7th fields - STATS=$(sed -E 's/[[:space:]]+/,/g' $STAT_FILE | sed -E 's/^,//'\ - | cut -d, -f3,7 | sed -E 's/,/ /g') - # get results of previous poll - read OLD_READ OLD_WRITE < $TEMP_IO - # save new poll results - read READ_SECTORS WRITE_SECTORS <<<$STATS - echo "$READ_SECTORS $WRITE_SECTORS" > $TEMP_IO - # update read and write sectors as difference since previous poll - READ_SECTORS=$(handle_integer_wrap $((READ_SECTORS - OLD_READ))) - WRITE_SECTORS=$(handle_integer_wrap $((WRITE_SECTORS - OLD_WRITE))) - - # output change in read/write sectors in MiB/s - echo "$READ_SECTORS $WRITE_SECTORS" \ - | awk -v T=$SLEEP_TIME -v B=$SECTOR_BYTES \ - '{ printf "%.3f\t%.3f", $1*B/T/1048576, $2*B/T/1048576 }' - else - printf "nan\tnan" - fi -} - -T_START=$(date +%s) - -function elapsed_time() { - T_NOW=$(date +%s) - T_ELAPSED=$((T_NOW-T_START)) - M=$((T_ELAPSED / 60)) - S=$((T_ELAPSED % 60)) - H=$((M / 60)) - M=$((M % 60)) - printf "%02d:%02d:%02d" $H $M $S -} - -function runtimeInfo() { - printf "%s\t%s\t%s\t%s\t%s\t%s\n" "$(elapsed_time)" "$(getCpuUsage)" "$(getMemUnavailable)" \ - "$(getDisk Used $MONITOR_MOUNT_POINT)" "$(getDisk Use% $MONITOR_MOUNT_POINT)" \ - "$(getBlockDeviceIO "$BLOCK_DEVICE_STAT_FILE")" -} - - -# print out header info -echo --- General Information --- -echo Num processors: $(nproc) -echo Total Memory: $(getMem MemTotal) -echo Total Disk space: $(getDisk Size "$MONITOR_MOUNT_POINT") GiB -echo Start time: $(date "+%F %T %z" -d @$T_START) -echo --- Runtime Information --- -echo -e "ElapsedTime\tCPU\tMem\tMemPct\tDisk\tDiskPct\tIORead\tIOWrite" -echo -e "HH:MM:SS \t%\tGiB\t%\tGiB\t%\tMiB/s\tMiB/s" - -# make a temp file to store io information, remove it on exit -TEMP_IO=$(mktemp "${TMPDIR:-/tmp/}$(basename $0).XXXXXXXXXXXX") -# make a temp file to store cpu information, remove it on exit -# remove temp files on exit -TEMP_CPU=$(mktemp "${TMPDIR:-/tmp/}$(basename $0).XXXXXXXXXXXX") -trap "rm -f $TEMP_IO $TEMP_CPU" EXIT - - -# find the block device -BLOCK_DEVICE=$(findBlockDevice "$MONITOR_MOUNT_POINT") -if [[ -z "$BLOCK_DEVICE" ]] \ - || [[ ! -f "$BLOCK_DEVICE/queue/hw_sector_size" ]]; then - # no block device found, can't get IO info - SECTOR_BYTES=0 - BLOCK_DEVICE_STAT_FILE="" -else - SECTOR_BYTES=$(cat "$BLOCK_DEVICE/queue/hw_sector_size") - BLOCK_DEVICE_STAT_FILE="$BLOCK_DEVICE/stat" -fi - - -# since getCpuUsage looks at differences in stat file, run the update so -# the first reported update has a sensible previous result to compare to -echo "0 0" > $TEMP_CPU -getCpuUsage > /dev/null - -# same thing for getBlockDeviceIO -echo "0 0" > $TEMP_IO -getBlockDeviceIO "$BLOCK_DEVICE_STAT_FILE" > /dev/null - -# sleep for just long enough to avoid divide by zero errors in CPU% and IO calculations -sleep 0.001 - -while true; do - runtimeInfo - sleep $SLEEP_TIME -done diff --git a/scripts/cromwell/download_monitoring_logs.py b/scripts/cromwell/download_monitoring_logs.py deleted file mode 100644 index 17f704570..000000000 --- a/scripts/cromwell/download_monitoring_logs.py +++ /dev/null @@ -1,117 +0,0 @@ -#!/bin/python - -import json -import os.path -import argparse -from google.cloud import storage -from joblib import Parallel, delayed -import random - -# Synopsis: -# Downloads all Cromwell monitoring logs from a given workflow. Monitoring logs should be generated using -# cromwell_monitoring_script.sh. Log files are named with the format: -# ....monitoring.log -# Note downloaded logs will not be overwritten if they already exist. If you interrupt a download, make sure to -# delete any partially-downloaded files. Empty files are created for logs that could not be found in GCS. -# -# Usage: -# python download_monitoring_logs.py workflow_metadata.json /output/dir -# -# Parameters: -# workflow_metadata.json : Workflow metadata file -# /output/dir : Directory to place logs -# -# Author: Mark Walker (markw@broadinstitute.org) - -# Download threads -NUM_THREADS = 8 -RAND_SEED = 7282993 - - -def getCalls(m, alias=None): - if isinstance(m, list): - call_metadata = [] - for m_shard in m: - call_metadata.extend(getCalls(m_shard, alias=alias)) - return call_metadata - - if 'labels' in m: - if 'wdl-call-alias' in m['labels']: - alias = m['labels']['wdl-call-alias'] - elif 'wdl-task-name' in m['labels']: - alias = m['labels']['wdl-task-name'] - - shard_index = '-2' - if 'shardIndex' in m: - shard_index = m['shardIndex'] - - attempt = '0' - if 'attempt' in m: - attempt = m['attempt'] - - job_id = 'na' - if 'jobId' in m: - job_id = m['jobId'].split('/')[-1] - - call_metadata = [] - if 'calls' in m: - for call in m['calls']: - # Skips scatters that don't contain calls - if '.' not in call: - continue - call_alias = call.split('.')[1] - call_metadata.extend(getCalls(m['calls'][call], alias=call_alias)) - - if 'subWorkflowMetadata' in m: - call_metadata.extend(getCalls(m['subWorkflowMetadata'], alias=alias)) - - # in a call - if alias and ('monitoringLog' in m): - call_metadata.append((m, alias, shard_index, attempt, job_id)) - - return call_metadata - - -def download(data, output_dir): - (m, alias, shard_index, attempt, job_id) = data - if job_id != 'na': - output_dest = output_dir + '/' + alias + '.' + \ - str(shard_index) + '.' + str(attempt) + \ - '.' + job_id + '.monitoring.log' - log_url = m['monitoringLog'] - if os.path.isfile(output_dest): - print("skipping " + log_url) - return - with open(output_dest, 'wb') as f: - client = storage.Client() - tokens = log_url.split('/') - bucket_name = tokens[2] - bucket_object = '/'.join(tokens[3:]) - bucket = client.get_bucket(bucket_name) - blob = bucket.get_blob(bucket_object) - if blob: - print(log_url) - blob.download_to_file(f) - -# Main function - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("workflow_metadata", - help="Workflow metadata JSON file") - parser.add_argument("output_dir", help="Output directory") - args = parser.parse_args() - random.seed(RAND_SEED) - - metadata_file = args.workflow_metadata - output_dir = args.output_dir - - metadata = json.load(open(metadata_file, 'r')) - call_metadata = getCalls(metadata, metadata['workflowName']) - Parallel(n_jobs=NUM_THREADS)(delayed(download)(d, output_dir) - for d in call_metadata) - - -if __name__ == "__main__": - main() diff --git a/scripts/cromwell/generate_inputs.py b/scripts/cromwell/generate_inputs.py deleted file mode 100644 index 735b6fefc..000000000 --- a/scripts/cromwell/generate_inputs.py +++ /dev/null @@ -1,381 +0,0 @@ -#!/bin/python - -import json -import argparse -import sys - -# Synopsis: -# - Generates a Cromwell input file for a given workflow -# - Determines the WDL Workflow and populates default parameters using an example input file (JSON) -# - Parses Cromwell metadata files generated by any preqrequisite steps to determine input values (replacing default values) -# - For workflows requiring sample-ordered list(s), the sample list is used to cross-check their order (assumes each file name contains the sample name) -# -# Usage: -# python generate_inputs.py workflow.wdl.example.json prereq_metadata_files.json -# -# Parameters: -# worfklow.wdl.example.json : Workflow input file containing all parameters. This is used to populate default values for parameters not determined from metadata. -# prereq_metadata_files.json : JSON-encoded set of prerequisite workflow metadata files (see generate_inputs_examples directory) -# -# Author: Mark Walker (markw@broadinstitute.org) - -# Prints error message and quits - - -def raise_error(msg): - raise ValueError(msg) - sys.exit(1) - -# Prints warning message to stderr - - -def print_warning(msg): - sys.stderr.write("Warning: " + msg) - -# Workflow-specific configuration class - - -class ScriptConfig: - def __init__(self, data_map, sample_ids_keys=None, sample_specific_file_lists=None): - self.data_map = data_map - self.sample_ids_keys = sample_ids_keys - self.sample_specific_file_lists = sample_specific_file_lists - - def requires_sample_ids(self): - if self.sample_ids_keys: - return True - return False - -# Definitions of prerequisite workflows and mappings from their inputs/outputs to the current workflow's input -# i.e. X_MAP[PREREQ_WORKFLOW][INPUT/OUTPUT][PREREQ_OUTPUT] = X_INPUT - - -# TODO : add gCNV -GATKSVPIPELINEPHASE1_MAP = { - "Module00a": { - "inputs": { - "samples": "samples" - }, - "outputs": { - "BAF_out": "BAF_files", - "coverage_counts": "counts", - "manta_vcf": "manta_vcfs", - "melt_vcf": "melt_vcfs", - "pesr_disc": "PE_files", - "pesr_split": "SR_files", - "wham_vcf": "wham_vcfs" - } - } -} - -MODULE00B_MAP = { - "Module00a": { - "inputs": { - "samples": "samples" - }, - "outputs": { - "coverage_counts": "counts", - "manta_vcf": "manta_vcfs", - "melt_vcf": "melt_vcfs", - "wham_vcf": "wham_vcfs" - } - } -} - -MODULE00C_MAP = { - "Module00a": { - "inputs": { - "samples": "samples" - }, - "outputs": { - "BAF_out": "BAF_files", - "coverage_counts": "counts", - "manta_vcf": "manta_vcfs", - "melt_vcf": "melt_vcfs", - "pesr_disc": "PE_files", - "pesr_split": "SR_files", - "wham_vcf": "wham_vcfs" - } - } -} - -MODULE01_MAP = { - "Module00c": { - "inputs": { - "batch": "batch" - }, - "outputs": { - "std_manta_vcf": "manta_vcfs", - "std_melt_vcf": "melt_vcfs", - "std_wham_vcf": "wham_vcfs", - "merged_dels": "del_bed", - "merged_dups": "dup_bed" - } - } -} - -MODULE02_MAP = { - "Module00c": { - "inputs": { - "samples": "samples", - "batch": "batch" - }, - "outputs": { - "merged_BAF": "baf_metrics", - "merged_SR": "splitfile", - "merged_PE": "discfile", - "merged_bincov": "coveragefile", - "median_cov": "medianfile" - } - }, - "Module01": { - "outputs": { - "depth_vcf": "depth_vcf", - "manta_vcf": "manta_vcf", - "wham_vcf": "wham_vcf", - "melt_vcf": "melt_vcf" - } - } -} - -MODULE03_MAP = { - "Module01": { - "inputs": { - "samples": "samples", - "batch": "batch" - }, - "outputs": { - "depth_vcf": "depth_vcf", - "manta_vcf": "manta_vcf", - "wham_vcf": "wham_vcf", - "melt_vcf": "melt_vcf" - } - }, - "Module02": { - "outputs": { - "metrics": "evidence_metrics" - } - } -} - -MODULE04_MAP = { - "Module00c": { - "inputs": { - "batch": "batch" - }, - "outputs": { - "merged_SR": "splitfile", - "merged_PE": "discfile", - "merged_bincov": "coveragefile", - "median_cov": "medianfile" - } - }, - "Module03": { - "outputs": { - "filtered_depth_vcf": "batch_depth_vcf", - "filtered_pesr_vcf": "batch_pesr_vcf", - "ped_file_postOutlierExclusion": "famfile", - "batch_samples_postOutlierExclusion": "samples", - "cutoffs": "rf_cutoffs" - } - } -} - -SCRIPT_CONFIGS = { - "GATKSVPipelinePhase1": ScriptConfig(GATKSVPIPELINEPHASE1_MAP, - sample_ids_keys=( - "Module00a", "inputs", "samples"), - sample_specific_file_lists=["BAF_files", "PE_files", "SR_files", "counts", "genotyped_segments_vcfs", "manta_vcfs", "melt_vcfs", "wham_vcfs"]), - "Module00b": ScriptConfig(MODULE00B_MAP, - sample_ids_keys=( - "Module00a", "inputs", "samples"), - sample_specific_file_lists=["counts", "manta_vcfs", "melt_vcfs", "wham_vcfs"]), - "Module00c": ScriptConfig(MODULE00C_MAP, - sample_specific_file_lists=["BAF_files", "PE_files", "SR_files", "counts", "manta_vcfs", "melt_vcfs", "wham_vcfs"]), - "Module01": ScriptConfig(MODULE01_MAP, - sample_ids_keys=( - "Module00c", "inputs", "samples"), - sample_specific_file_lists=["manta_vcfs", "melt_vcfs", "wham_vcfs"]), - "Module02": ScriptConfig(MODULE02_MAP, - sample_ids_keys=("Module00c", "inputs", "samples")), - "Module03": ScriptConfig(MODULE03_MAP, - sample_ids_keys=("Module01", "inputs", "samples")), - # No sample order checking post-exclusion - "Module04": ScriptConfig(MODULE04_MAP) -} - - -def load_json(filepath): - with open(filepath, 'r') as f: - return json.load(f) - return - - -def determine_workflow_name(default_inputs): - workflow_name = "" - for key in default_inputs: - if '.' not in key: - raise_error('Missing "." in WDL input field: ' + key) - tokens = key.split('.') - if not workflow_name: - workflow_name = tokens[0] - else: - if tokens[0] != workflow_name: - raise_error('Inconsistent workflow name: ' + tokens[0]) - if not workflow_name: - raise_error( - 'Workflow name could not be determined from the WDL input file') - return workflow_name - - -def get_workflow_config(workflow_name): - if workflow_name not in SCRIPT_CONFIGS: - raise_error('Could not find workflow "' + workflow_name + - '", options are: ' + str(SCRIPT_CONFIGS.keys())) - return SCRIPT_CONFIGS[workflow_name] - - -def check_all_metadata_present(script_config, metadata_files): - if script_config.data_map.keys() != metadata_files.keys(): - raise_error('Script config workflows and metadata file workflows did not match. Script config expected ' + - str(workflows) + ' but got metadata for ' + str(metadata_files.keys())) - - -def check_expected_workflow_fields(script_config, default_inputs, workflow_name): - for workflow in script_config.data_map: - if "outputs" in script_config.data_map[workflow]: - for output_name in script_config.data_map[workflow]["outputs"]: - wdl_input_name = workflow_name + "." + \ - script_config.data_map[workflow]["outputs"][output_name] - if wdl_input_name not in default_inputs: - raise_error('Script configuration expected field ' + - wdl_input_name + ' but it was not found in the WDL input file') - - -def load_prerequisite_metadata(metadata_files): - prereq_metadata = {} - for workflow in metadata_files: - with open(metadata_files[workflow], 'r') as f: - m = json.load(f) - if 'outputs' not in m: - raise_error( - 'Metadata ' + metadata_files[workflow] + ' did not have an outputs field') - prereq_metadata[workflow] = m - return prereq_metadata - - -def get_preqreq_values(workflow_map, workflow_metadata, script_config, prereq_attr_prefix, workflow_name, inputs): - for expected_name in workflow_map: - name = prereq_attr_prefix + expected_name - if name not in workflow_metadata or not workflow_metadata[name]: - print_warning('could not find metadata for attribute ' + - name + ', using default value if provided\n') - else: - input_name = workflow_name + "." + workflow_map[expected_name] - inputs[input_name] = workflow_metadata[name] - - -def get_workflow_inputs(prereq_metadata, script_config, default_inputs, workflow_name): - inputs = {} - for prereq_workflow_name in script_config.data_map: - workflow_metadata = prereq_metadata[prereq_workflow_name] - data_maps = script_config.data_map[prereq_workflow_name] - if "inputs" in data_maps: - get_preqreq_values( - data_maps["inputs"], workflow_metadata["inputs"], script_config, "", workflow_name, inputs) - if "outputs" in data_maps: - get_preqreq_values(data_maps["outputs"], workflow_metadata["outputs"], - script_config, prereq_workflow_name + ".", workflow_name, inputs) - # Fill in rest of the fields with defaults inputs file - for key in default_inputs: - if key not in inputs: - inputs[key] = default_inputs[key] - return inputs - - -def get_samples_list(script_config, prereq_metadata): - samples_workflow = script_config.sample_ids_keys[0] - samples_workflow_metadata_key = script_config.sample_ids_keys[1] - samples_attr = script_config.sample_ids_keys[2] - if samples_workflow not in prereq_metadata: - raise_error("Expected metadata for workflow " + samples_workflow) - if samples_workflow_metadata_key not in prereq_metadata[samples_workflow]: - raise_error("Expected to find key " + samples_workflow_metadata_key + " in workflow " + - samples_workflow + " metadata, but found: " + str(prereq_metadata[samples_workflow].keys())) - if samples_attr not in prereq_metadata[samples_workflow][samples_workflow_metadata_key]: - raise_error("Expected to find attribute " + samples_workflow_metadata_key + - " : { " + samples_attr + " } in workflow " + samples_workflow) - return prereq_metadata[samples_workflow][samples_workflow_metadata_key][samples_attr] - - -def cross_check_sample_order(workflow_name, script_config, inputs, samples_list): - for sample_specific_name in [workflow_name + "." + name for name in script_config.sample_specific_file_lists]: - if sample_specific_name not in inputs: - raise_error( - 'Expected to find sample-specific parameter list ' + sample_specific_name) - sample_specific_values = inputs[sample_specific_name] - if not isinstance(sample_specific_values, list): - raise_error('Expected sample-specific value ' + sample_specific_name + - ' to be of type list but found ' + str(type(sample_specific_values))) - if len(sample_specific_values) != len(samples_list): - print_warning('Length of samples list is ' + str(len(samples_list)) + - ' but length of sample-specific parameter ' + sample_specific_name + ' was ' + str(len(sample_specific_values))) - for i in range(len(samples_list)): - sample_id = samples_list[i] - if sample_id not in sample_specific_values[i]: - print_warning('Did not find sample id ' + sample_id + ' in input ' + - sample_specific_name + '[' + str(i) + '], found ' + str(sample_specific_values[i])) - -# Main function - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument( - "default_inputs", help="Inputs JSON file containing default parameter values") - parser.add_argument("prereq_workflow_paths", - help="JSON file specifying metadata file paths for each prerequisite workflow with format \"workflow_name\" : \"/path/to/metadata\"") - args = parser.parse_args() - - # Load the inputs file for the workflow - default_inputs = load_json(args.default_inputs) - - # Load preqreq metadata file paths, if provided - metadata_files = load_json(args.prereq_workflow_paths) - - # Determine name of the current workflow - workflow_name = determine_workflow_name(default_inputs) - - # Check that workflow is defined and retrieve it - script_config = get_workflow_config(workflow_name) - - # Check that the expected prerequisite workflow metadata files were provided - check_all_metadata_present(script_config, metadata_files) - - # Check that all script config fields are present in the WDL inputs file - # check_expected_workflow_fields(script_config, default_inputs, workflow_name) - - # Load prerequisite metadata outputs - prereq_metadata = load_prerequisite_metadata(metadata_files) - - # Map metadata outputs to workflow inputs and fill in default values - inputs = get_workflow_inputs( - prereq_metadata, script_config, default_inputs, workflow_name) - - # Use samples list if provided - if script_config.requires_sample_ids(): - samples_attr = script_config.sample_ids_keys[2] - samples_name = workflow_name + "." + samples_attr - inputs[samples_name] = get_samples_list(script_config, prereq_metadata) - samples_list = inputs[samples_name] - # Checks that sample-specific lists contain sample ids in correct order - if script_config.sample_specific_file_lists: - cross_check_sample_order( - workflow_name, script_config, inputs, samples_list) - - # Print output - print json.dumps(inputs, sort_keys=True, indent=2) - - -if __name__ == "__main__": - main() diff --git a/scripts/cromwell/get_cromwell_resource_usage.sh b/scripts/cromwell/get_cromwell_resource_usage.sh deleted file mode 100755 index 640fc6b34..000000000 --- a/scripts/cromwell/get_cromwell_resource_usage.sh +++ /dev/null @@ -1,282 +0,0 @@ -#!/bin/bash -# USAGE: get_cromwell_memory_usage.sh WORKFLOW_ID -# or -# get_cromwell_memory_usage.sh GCS_PATH_TO_WORKFLOW_FOLDER -# Displays #shards x #fields table of memory usage info with one header -# line. -# -If you pass a workflow id, cromshell will be used to find the -# appropriate workflow folder in google cloud storage. -# -If cromshell is not located on your PATH, then define the ENV -# variable CROMSHELL with the appropriate path. -# -This script works by finding all the logs, sorting them into sensible -# order, and chopping up their paths to make a description column. If -# any jobs have not completed they will simply be omitted. The script -# makes no attempt to figure out what tasks *should* have run. However, -# the description field should make any such omissions discoverable. -# -Since there is significant start-up time to running gsutil functions, -# running the inner loop of this script in parallel results in -# signficant speed-up. Installing parallel (available on osx and linux) -# triggers this automatically. - -set -Eeu -o pipefail - -CROMSHELL=${CROMSHELL:-"cromshell"} - -WORKFLOW_INFO="$1" -if [[ $WORKFLOW_INFO == "gs://"* ]]; then - WORKFLOW_DIR="$WORKFLOW_INFO" -else - WORKFLOW_ID="$WORKFLOW_INFO" - # get the metadata for this workflow id - METADATA=$(2>/dev/null "$CROMSHELL" metadata $WORKFLOW_ID) - # get the appropriate path in google cloud for the workflow dir - # from the metadata - # a) find lines in the metadata that include a - WORKFLOW_DIR=$( \ - echo "$METADATA" \ - | grep -o "gs://[^\S]*$WORKFLOW_ID" \ - | tail -n1 \ - ) -fi -1>&2 echo "WORKFLOW_DIR=$WORKFLOW_DIR" - - -function get_monitor_logs() { - TOP_DIR=$1 - gsutil -m ls "$TOP_DIR/**monitoring.log" 2>/dev/null || echo "" -} - - -# ingest LOG_FILE, TOP_DIR, NUM_SHARDS -# print out sorting key of form ATTEMPT_KEY -tab- MAIN_KEY -# where ATTEMPT_KEY -# -lists the attempt number of the executing tasks OR -# -if there is no attempt in the path, calls it attempt 0 -# -digits are padded the same as shards -# where MAIN_KEY -# -preserves info about calls and shard numbers, each separated -# by '/' -# -shard numbers having enough 0-padded digts to all be of -# the same length -function get_task_sort_key() { - LOG_FILE=$1 - TOP_DIR=$2 - N_START=$((1 + $(echo "$TOP_DIR" | tr / '\n' | wc -l))) - NUM_SHARDS=$(($3)) - MAX_SHARD_DIGITS=${#NUM_SHARDS} - SHARD_FORMAT="%0${MAX_SHARD_DIGITS}d" - # keep info about task calls, shards, and attempts below top-dir - # if there is no preemption folder in the path, call it attempt 0 - echo "$LOG_FILE" \ - | tr / '\n' \ - | tail -n+$N_START \ - | awk -v FS='-' \ - -v SHARD_FORMAT="$SHARD_FORMAT" ' { - if($1 == "shard") { - SHARD_KEY=sprintf("%s/" SHARD_FORMAT, SHARD_KEY, $2) - } else if($1 == "call") { - CALL_KEY=sprintf("%s/%s", CALL_KEY, $2) - } else if($1 == "attempt") { - ATTEMPT_NUMBER=$2 - } - } END { - printf SHARD_FORMAT "\t%s/%s", ATTEMPT_NUMBER, CALL_KEY, SHARD_KEY - }' -} - - - -function sort_monitor_logs() { - TOP_DIR="$1" - LOGS_LIST=$(cat) - NUM_LOGS=$(($(echo "$LOGS_LIST" | wc -l))) - # The older bash on OSX does not have associative arrays, so to - # sort file names according to a key, we join the key and the file - # name into one string with tab delimiters (okay because these are - # cloud paths produced by cromwell and have no tabs). Then sort by - # the key, and ultimately cut away the key. There is one extra - # complication that there may be multiple "attempts" at each task, - # and we only want to keep the final (presumably successful) - # attempt. - # - # 1. for each log file - # a) get a sort key of form: ATTEMPT_KEY tab MAIN_KEY - # b) print line of form: LOG_FILE tab MAIN_KEY tab ATTEMPT_KEY - # 2. sort lines by increasing MAIN_KEY, and secondarily by - # decreasing (numeric) ATTEMPT_KEY - # 3. keep first unique instance of MAIN_KEY (i.e. the last attempt) - # 4. print out the log file (the first field) in sorted order - echo "$LOGS_LIST" \ - | while read LOG_FILE; do - SORT_KEY=$(get_task_sort_key "$LOG_FILE" "$TOP_DIR" "$NUM_LOGS") - printf "%s\t%s\n" "$LOG_FILE" "$SORT_KEY" - done \ - | sort -t $'\t' -k3,3 -k2,2rn \ - | uniq -f2 \ - | cut -d$'\t' -f1 -} - - -function date_stamp_to_seconds() { - if [ "$(uname)" == "Darwin" ]; then - date -j -u -f "%a %b %d %T %Z %Y" "$1" "+%s" - else - date -d "$1" "+%s" - fi -} -export -f date_stamp_to_seconds - - -function get_task_peak_resource_usage() { - LOG_FILE=$1 - - gsutil cat "$LOG_FILE" \ - | awk -v OFS='\t' ' - /^\[.*\]$/ { - DATE_STR=substr($0,2,length($0)-2) - if(MIN_TIME == "") { - MIN_TIME=DATE_STR - } - else { - MAX_TIME=DATE_STR - } - } - $1 == "*" { - if($2 == "Memory") { - if($4 > PEAK_MEM) { - PEAK_MEM = $4 - } - } else if($2 == "CPU") { - if($4 > PEAK_CPU) { - PEAK_CPU = $4 - } - } else if($2 == "Disk") { - DISK=$4 - LEN=length(DISK) - UNIT=substr(DISK, LEN) - if(UNIT ~ /[A-Z]/) { - if(UNIT == "T") { - SCALE=2^10 - } else if(UNIT == "G") { - SCALE=1 - } else if(UNIT == "M") { - SCALE=2^-10 - } else if(UNIT == "K") { - SCALE=2^-20 - } else if(UNIT == "B") { - SCALE=2^-30 - } else { - SCALE=1 - } - DISK_VAL=substr(DISK, 0, LEN-1) * SCALE - } else { - DISK_VAL=DISK - } - if(DISK_VAL > PEAK_DISK) { - PEAK_DISK=DISK_VAL - } - } else if($2 == "Read/Write") { - if($4 > PEAK_READ || PEAK_READ == "") { - PEAK_READ=$4 - } - if($6 > PEAK_WRITE || PEAK_WRITE == "") { - PEAK_WRITE=$6 - } - } - } END { - if(PEAK_MEM == "") { - PEAK_MEM="nan" - } - if(PEAK_DISK == "") { - PEAK_DISK="nan" - } - if(PEAK_CPU == "") { - PEAK_CPU="nan" - } - if(PEAK_READ == "" || PEAK_READ == "N/A") { - PEAK_READ="nan" - } - if(PEAK_WRITE == "" || PEAK_WRITE == "N/A") { - PEAK_WRITE="nan" - } - if(MAX_TIME == "") { - MIN_TIME = "Thu Jan 1 00:00:00 UTC 1970" - MAX_TIME = MIN_TIME - } - print PEAK_MEM, PEAK_DISK, PEAK_CPU, PEAK_READ, PEAK_WRITE - print MIN_TIME - print MAX_TIME - }' -} -export -f get_task_peak_resource_usage - - -function get_task_description() { - LOG_FILE=$1 - if [ $# -ge 2 ]; then - TOP_DIR=$2 - N_START=$((1 + $(echo "$TOP_DIR" | tr / '\n' | wc -l))) - else - N_START=1 - fi - # keep info about task calls and shards below top-dir - echo "$LOG_FILE" \ - | tr / '\n' \ - | tail -n+$N_START \ - | grep -E "^(call-|shard-|attempt-)" \ - | tr '\n' / \ - | sed -e 's/call-//g' -e 's,/$,,' -} -export -f get_task_description - - -function get_task_columns() { - LOG_FILE="$1" - TOP_DIR="$2" - DESCRIPTION=$(get_task_description "$LOG_FILE" "$TOP_DIR") - RESOURCE_USAGE=$(get_task_peak_resource_usage "$LOG_FILE") - get_task_peak_resource_usage "$LOG_FILE" | { - read RESOURCE_USAGE - read MIN_TIME - read MAX_TIME - MIN_SECONDS=$(date_stamp_to_seconds "$MIN_TIME") - MAX_SECONDS=$(date_stamp_to_seconds "$MAX_TIME") - awk -v DESCRIPTION="$DESCRIPTION" \ - -v RESOURCE_USAGE="$RESOURCE_USAGE" \ - -v MIN_SECONDS=$MIN_SECONDS \ - -v MAX_SECONDS=$MAX_SECONDS \ - 'END { - printf "%s\t%.3f\t%s\n", RESOURCE_USAGE, (MAX_SECONDS-MIN_SECONDS)/60/60, DESCRIPTION - }' /dev/null - } -} -export -f get_task_columns - - -function get_workflow_peak_resource_usage() { - export TOP_DIR=$1 - echo -e "mem_GiB\tdisk_GiB\tcpu_%\tread_MiB/s\twrite_MiB/s\truntime_Hours\ttask_description" - LOGS=$(get_monitor_logs "$TOP_DIR" | sort_monitor_logs "$TOP_DIR") - if [ -z "$LOGS" ]; then - 1>&2 echo "No logs found in $TOP_DIR" - exit 0 - fi - if command -v parallel > /dev/null; then - # parallel command is installed, use it, much faster! - if [ -t 1 ]; then - # stdout is a terminal, not being redirected, don't use bar - BAR="" - else - # being redirected, show progress via bar to stderr - BAR="--bar" - fi - echo "$LOGS" | parallel ${BAR} --env TOP_DIR -k "get_task_columns {} $TOP_DIR" - else - 1>&2 echo "Consider installing 'parallel', it will give significant speed-up" - echo "$LOGS" | while read WORKFLOW_LOG; do - get_task_columns "$WORKFLOW_LOG" "$TOP_DIR" - done - fi -} - -get_workflow_peak_resource_usage "$WORKFLOW_DIR" diff --git a/scripts/cromwell/get_cromwell_resource_usage2.sh b/scripts/cromwell/get_cromwell_resource_usage2.sh deleted file mode 100755 index ce4e997ad..000000000 --- a/scripts/cromwell/get_cromwell_resource_usage2.sh +++ /dev/null @@ -1,366 +0,0 @@ -#!/bin/bash - -function show_help() { - cat <<-END -USAGE: get_cromwell_memory_usage2.sh [OPTIONS] WORKFLOW_INFO -Displays #tasks x #fields table of resource usage info with two -header lines, and additional column of task descriptions. - WORKFLOW_INFO: specifies workflow identity. Can be - a) a cromwell workflow ID - b) a path to workflow output in google cloud (starting with gs://) - c) a local path to workflow output - OPTIONS: - -r --raw-output - If set, output data as tab-separated table. Otherwise pass data through column -t for - easier reading. - -u --no-units - If set, don't show second header line with units. - -o --output-file OUTPUT_FILE_NAME - If specified, write output to file instead of stdout. Note that all non-tabular output is - sent to stderr, so this is just syntactic sugar for file redirection. - --This script works by finding all the logs, sorting them into sensible - order, and chopping up their paths to make a description column. If - any jobs have not completed they will simply be omitted. The script - makes no attempt to figure out what tasks *should* have run. However, - the description field should make any such omissions discoverable. --If you run on a local path, the log file names must still be - "monitoring.log", and the local folder structure must be the same as - in the original cloud bucket (other non-log files are not required - though) --If you pass a workflow id, cromshell will be used to find the - appropriate workflow folder in google cloud storage. --If cromshell is not located on your PATH, then define the ENV - variable CROMSHELL with the appropriate path. --Since there is significant start-up time to running gsutil functions, - running the inner loop of this script in parallel results in - signficant speed-up. Installing gnu parallel (available on osx and - linux) triggers this automatically. -END -} - -if [[ $# == 0 ]]; then - show_help - exit 0 -fi -RAW_OUTPUT=false -SHOW_UNITS=true -OUTPUT_FILE="/dev/stdout" -WORKFLOW_INFO="" -for ((i=1; i<=$#; ++i)); do - if [[ ${!i} =~ ^-+(h|help) ]]; then - show_help - exit 0 - elif [[ ${!i} =~ ^-+(r|raw-output) ]]; then - RAW_OUTPUT=true - elif [[ ${!i} =~ ^-+(u|no-units) ]]; then - SHOW_UNITS=false - elif [[ ${!i} =~ ^-+(o|output-file) ]]; then - ((++i)) - OUTPUT_FILE="${!i}" - elif [[ ${!i} =~ ^-.* ]]; then - 1>&2 echo "Unknown option ${!i}" - show_help - exit 1 - elif [[ -z "$WORKFLOW_INFO" ]]; then - WORKFLOW_INFO=${!i} - else - 1>&2 echo "Too many arguments" - exit 1 - fi -done -export SHOW_UNITS - -set -Eeu -o pipefail - -CROMSHELL=${CROMSHELL:-"cromshell"} - -REMOTE=true -if [[ -z "$WORKFLOW_INFO" ]]; then - 1>&2 echo "No WORKFLOW_INFO provided" - show_help - exit 1 -elif [[ $WORKFLOW_INFO == "gs://"* ]]; then - # workflow info is a cloud file, strip trailing slashes (must use sed because OSX uses old bash) - WORKFLOW_DIR=$(echo "$WORKFLOW_INFO" | sed 's,/*$,,g') -elif [[ -d "$WORKFLOW_INFO" ]]; then - # workflow info is a local file, strip trailing slashes (must use sed because OSX uses old bash) - WORKFLOW_DIR=$(echo "$WORKFLOW_INFO" | sed 's,/*$,,g') - REMOTE=false -else - WORKFLOW_ID="$WORKFLOW_INFO" - # get the metadata for this workflow id - if ! METADATA=$(2>/dev/null $CROMSHELL -t 60 slim-metadata $WORKFLOW_ID); then - 1>&2 echo "Unable to obtain workflow $WORKFLOW_ID metadata from cromshell, try supplying GCS_PATH_TO_WORKFLOW_FOLDER" - exit 1 - fi - # get the appropriate path in google cloud for the workflow dir - # from the metadata - # a) find lines in the metadata that include a - WORKFLOW_DIR=$( \ - echo "$METADATA" \ - | grep -Eo "gs://[^[:space:]]*$WORKFLOW_ID" \ - | tail -n1 \ - ) -fi -1>&2 echo "WORKFLOW_DIR=$WORKFLOW_DIR" - - -function get_monitor_logs() { - TOP_DIR=$1 - REMOTE=$2 - if $REMOTE; then - gsutil -m ls "$TOP_DIR/**monitoring.log" 2>/dev/null || echo "" - else - find "$TOP_DIR" -name "monitoring.log" 2>/dev/null || echo "" - fi -} - - -# ingest LOG_FILE, TOP_DIR, NUM_SHARDS -# print out sorting key of form ATTEMPT_KEY -tab- MAIN_KEY -# where ATTEMPT_KEY -# -lists the attempt number of the executing tasks OR -# -if there is no attempt in the path, calls it attempt 0 -# -digits are padded the same as shards -# where MAIN_KEY -# -preserves info about calls and shard numbers, each separated -# by '/' -# -shard numbers having enough 0-padded digts to all be of -# the same length -function get_task_sort_key() { - LOG_FILE=$1 - TOP_DIR=$2 - N_START=$((1 + $(echo "$TOP_DIR" | tr / '\n' | wc -l))) - NUM_SHARDS=$(($3)) - MAX_SHARD_DIGITS=${#NUM_SHARDS} - SHARD_FORMAT="%0${MAX_SHARD_DIGITS}d" - # keep info about task calls, shards, and attempts below top-dir - # if there is no preemption folder in the path, call it attempt 0 - echo "$LOG_FILE" \ - | tr / '\n' \ - | tail -n+$N_START \ - | awk -v FS='-' \ - -v SHARD_FORMAT="$SHARD_FORMAT" ' - { - if($1 == "shard") { - SHARD_KEY=sprintf("%s/" SHARD_FORMAT, SHARD_KEY, $2) - } else if($1 == "call") { - CALL_KEY=sprintf("%s/%s", CALL_KEY, $2) - } else if($1 == "attempt") { - ATTEMPT_NUMBER=$2 - } - } - END { - printf SHARD_FORMAT "\t%s/%s", ATTEMPT_NUMBER, CALL_KEY, SHARD_KEY - }' -} - - -function sort_monitor_logs() { - TOP_DIR="$1" - LOGS_LIST=$(cat) - NUM_LOGS=$(($(echo "$LOGS_LIST" | wc -l))) - # The older bash on OSX does not have associative arrays, so to - # sort file names according to a key, we join the key and the file - # name into one string with tab delimiters (okay because these are - # cloud paths produced by cromwell and have no tabs). Then sort by - # the key, and ultimately cut away the key. There is one extra - # complication that there may be multiple "attempts" at each task, - # and we only want to keep the final (presumably successful) - # attempt. - # - # 1. for each log file - # a) get a sort key of form: ATTEMPT_KEY tab MAIN_KEY - # b) print line of form: LOG_FILE tab MAIN_KEY tab ATTEMPT_KEY - # 2. sort lines by increasing MAIN_KEY, and secondarily by - # decreasing (numeric) ATTEMPT_KEY - # 3. keep first unique instance of MAIN_KEY (i.e. the last attempt) - # 4. print out the log file (the first field) in sorted order - echo "$LOGS_LIST" \ - | while read -r LOG_FILE; do - SORT_KEY=$(get_task_sort_key "$LOG_FILE" "$TOP_DIR" "$NUM_LOGS") - printf "%s\t%s\n" "$LOG_FILE" "$SORT_KEY" - done \ - | sort -t $'\t' -k3,3 -k2,2rn \ - | uniq -f2 \ - | cut -d$'\t' -f1 -} - - -# Scan LOG_FILE, extract header, and print maximum of each column. -# If a column is missing data, print "nan" -function get_task_peak_resource_usage() { - LOG_FILE=$1 - if $REMOTE; then - gsutil cat "$LOG_FILE" - else - cat "$LOG_FILE" - fi \ - | awk ' - function handle_nan(num) { - return num < 0 ? "nan" : num - } - BEGIN { - NEED_HEADER=2 - } - NEED_HEADER == 0 { - split($0, WORDS, /\t/) - PEAK_VALUE[1] = WORDS[1] - for(i=2; i<=length(WORDS); ++i) { - WORD=WORDS[i] - if(length(WORD) > 0 && WORD > PEAK_VALUE[i]) { - PEAK_VALUE[i] = WORD - } - } - } - NEED_HEADER>0 { - if(NEED_HEADER==2) { - if($1" "$2 == "Num processors:") { - TOT["CPU"] = $3 - TOT_NAME["CPU"] = "nCPU" - TOT_UNIT["CPU"] = "#" - } - else if($1" "$2 == "Total Memory:") { - TOT["Mem"] = $3 - TOT_NAME["Mem"] = "TotMem" - TOT_UNIT["Mem"] = $4 - } - else if($1" "$2 == "Total Disk") { - TOT["Disk"] = $4 - TOT_NAME["Disk"] = "TotDisk" - TOT_UNIT["Disk"] = $5 - } - else if($1 == "ElapsedTime") { - # this is the first header line - # for summary purposes, augment instantaneous - # usage with total VM stats - PEAK_VALUE[1] = "00:00:00" - HEADER_NAME[1] = $1 - printf "%s", $1 - for(i=2; i<=NF; ++i) { - PEAK_VALUE[i] = -1.0 - HEADER_NAME[i] = $i - if($i in TOT_NAME) { - printf "\t%s", TOT_NAME[$i] - delete TOT_NAME[$i] - } - printf "\t%s", $i - } - printf "\n" - --NEED_HEADER - } - } else { - printf "%s", $1 - for(i=2; i<=NF; ++i) { - NAME_i = HEADER_NAME[i] - if(NAME_i in TOT_UNIT) { - printf "\t%s", TOT_UNIT[NAME_i] - delete TOT_UNIT[NAME_i] - } - printf "\t%s", $i - } - printf "\n" - --NEED_HEADER - } - } - END { - printf "%s", handle_nan(PEAK_VALUE[1]) - for(i=2; i<=length(PEAK_VALUE); ++i) { - NAME_i = HEADER_NAME[i] - if(NAME_i in TOT) { - printf "\t%s", handle_nan(TOT[NAME_i]) - delete TOT[NAME_i] - } - printf "\t%s", handle_nan(PEAK_VALUE[i]) - } - printf "\n" - } - ' -} -export -f get_task_peak_resource_usage - - -# Condense directory structure of full path to LOG_FILE into a succinct -# description of the task. Ignore components above TOP_DIR, as they are -# common to all the log files that are being requested. -function get_task_description() { - LOG_FILE=$1 - if [ $# -ge 2 ]; then - TOP_DIR=$2 - N_START=$((1 + $(echo "$TOP_DIR" | tr / '\n' | wc -l))) - else - N_START=1 - fi - # keep info about task calls and shards below top-dir - echo "$LOG_FILE" \ - | tr / '\n' \ - | tail -n+$N_START \ - | grep -E "^(call-|shard-|attempt-)" \ - | tr '\n' / \ - | sed -e 's/call-//g' -e 's,/$,,' -} -export -f get_task_description - -function get_task_columns() { - LOG_FILE="$1" - TOP_DIR="$2" - RESOURCE_USAGE=$(get_task_peak_resource_usage "$LOG_FILE") - if [[ -n "$RESOURCE_USAGE" ]]; then - DESCRIPTION=$(get_task_description "$LOG_FILE" "$TOP_DIR") - - # due to OSX having an ancient version of bash, this produces syntax errors: - # paste <(echo "$RESOURCE_USAGE" | head -n2) <(echo "task") - printf "%s\ttask\n" "$(echo "$RESOURCE_USAGE" | head -n1)" - if $SHOW_UNITS; then - echo "$RESOURCE_USAGE" | tail -n2 | head -n1 - fi - printf "%s\t%s\n" "$(echo "$RESOURCE_USAGE" | tail -n1)" "$DESCRIPTION" - fi -} -export -f get_task_columns - - -function get_workflow_peak_resource_usage() { - export TOP_DIR=$1 - export REMOTE=$2 - LOGS=$(get_monitor_logs "$TOP_DIR" $REMOTE | sort_monitor_logs "$TOP_DIR") - if [ -z "$LOGS" ]; then - 1>&2 echo "No logs found in $TOP_DIR" - exit 0 - fi - if command -v parallel > /dev/null; then - # parallel command is installed, use it, much faster! - if [ -t 1 ]; then - # stdout is a terminal, not being redirected, don't use bar - BAR="" - else - # being redirected, show progress via bar to stderr - #BAR="--bar" - # NOTE: keeping above line for now to see if I can find a way - # to make it work, but it looks like --bar may be incompatible - # with filtering out potentially empty results from logs that - # were truncated before the header line - BAR="" - fi - - echo "$LOGS" \ - | parallel ${BAR} --env TOP_DIR -k --colsep $'\t' "get_task_columns {1} $TOP_DIR" - else - 1>&2 echo "Consider installing 'parallel', it will give significant speed-up" - echo "$LOGS" | while read -r WORKFLOW_LOG; do - get_task_columns "$WORKFLOW_LOG" "$TOP_DIR" - done - fi \ - | if $SHOW_UNITS; then - awk 'FNR < 3 || FNR%3 == 0 { print $0 }' - else - awk 'FNR < 2 || FNR%2 == 0 { print $0 }' - fi -} - -if $RAW_OUTPUT; then - get_workflow_peak_resource_usage "$WORKFLOW_DIR" $REMOTE -else - get_workflow_peak_resource_usage "$WORKFLOW_DIR" $REMOTE | column -t -fi > "$OUTPUT_FILE" diff --git a/scripts/cromwell/get_inputs_outputs.py b/scripts/cromwell/get_inputs_outputs.py deleted file mode 100644 index a30a49b3a..000000000 --- a/scripts/cromwell/get_inputs_outputs.py +++ /dev/null @@ -1,77 +0,0 @@ -#!/bin/python - -import json -import argparse -import os - -# Synopsis: -# Generates JSON files with the inputs and outputs of every (sub)workflow -# -# Usage: -# python get_inputs_outputs.py workflow_metadata.json /output/dir -# -# Parameters: -# workflow_metadata.json : Workflow metadata file -# /output/dir : Directory to place logs -# -# Author: Mark Walker (markw@broadinstitute.org) - - -def get_subworkflows(m, alias): - if isinstance(m, list): - return get_subworkflows(m[0], alias) - - task = '' - if 'workflowName' in m: - task = m['workflowName'] - - # in a call - if not ('subWorkflowMetadata' in m or 'calls' in m): - return [] - - call_metadata = [] - if 'calls' in m: - for call in m['calls']: - call_metadata.extend(get_subworkflows(m['calls'][call], call)) - - if 'subWorkflowMetadata' in m: - call_metadata.extend(get_subworkflows(m['subWorkflowMetadata'], alias)) - - if ('inputs' in m or 'outputs' in m) and task: - call_metadata.append((m, task, alias)) - - return call_metadata - - -def write_files(workflow_metadata, output_dir): - for (m, task, alias) in workflow_metadata: - inputs_path = os.path.join(output_dir, alias + '.inputs.json') - outputs_path = os.path.join(output_dir, alias + '.outputs.json') - if 'inputs' in m: - inputs = {task + "." + key: val for key, val in m['inputs'].items() if val} - with open(inputs_path, 'w') as f: - f.write(json.dumps(inputs, sort_keys=True, indent=2)) - if 'outputs' in m: - outputs = {key: val for key, val in m['outputs'].items() if val} - with open(outputs_path, 'w') as f: - f.write(json.dumps(outputs, sort_keys=True, indent=2)) - - -# Main function -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("workflow_metadata", - help="Workflow metadata JSON file") - parser.add_argument("output_dir", help="Output directory") - args = parser.parse_args() - - metadata_file = args.workflow_metadata - output_dir = args.output_dir - - metadata = json.load(open(metadata_file, 'r')) - workflow_metadata = get_subworkflows(metadata, metadata['workflowName']) - write_files(workflow_metadata, output_dir) - - -if __name__ == "__main__": - main() diff --git a/scripts/cromwell/get_output_paths.py b/scripts/cromwell/get_output_paths.py deleted file mode 100644 index aaadfda69..000000000 --- a/scripts/cromwell/get_output_paths.py +++ /dev/null @@ -1,263 +0,0 @@ -#!/bin/python - -import argparse -import json -import logging -import re -import os.path -from urllib.parse import urlparse - -from google.cloud import storage - -""" -Summary: Find GCS paths for specified workflow file outputs for multiple workflows at once without downloading metadata. - -Caveats: Assumes cromwell file structure. Recommended for use with cromwell final_workflow_outputs_dir - to reduce number of files to search. Requires file suffixes for each output file that are - unique within the workflow directory. - -For usage & parameters: Run python get_output_paths.py --help - -Output: TSV file with columns for each output variable and a row for each - batch (or entity, if providing --entities-file), containing GCS output paths - -Author: Emma Pierce-Hoffman (epierceh@broadinstitute.org) -""" - - -def check_file_nonempty(f): - # Validate existence of file and that it is > 0 bytes - if not os.path.isfile(f): - raise RuntimeError("Required input file %s does not exist." % f) - elif os.path.getsize(f) == 0: - raise RuntimeError("Required input file %s is empty." % f) - - -def read_entities_file(entities_file): - # Get list of entities from -e entities file - entities = [] - if entities_file is not None: - # proceed with reading file - must not be None at this point - check_file_nonempty(entities_file) - with open(entities_file, 'r') as f: - for line in f: - entities.append(line.strip()) - return entities - - -def load_filenames(filenames): - # Read -f filenames / output names JSON - files_dict = json.load(open(filenames, 'r')) - output_names = sorted(files_dict.keys()) - if len(output_names) == 0: - raise ValueError("No output files to search for found in required -f/--filenames JSON %s." % filenames) - return files_dict, output_names - - -def split_bucket_subdir(directory): - # Parse -b URI input into top-level bucket name (no gs://) and subdirectory path - uri = urlparse(directory) - return uri.netloc, uri.path.lstrip("/") - - -def get_batch_dirs(workflows, workflow_id, directory): - # Return list of (batch_name, batch_subdirectory) and top-level bucket parsed from -b URI input - batches_dirs = [] # to hold tuples of (batch, dir) in order given in input - bucket, subdir = split_bucket_subdir(directory) - # If using -i input, just add workflow ID to subdirectory path and return - if workflow_id is not None: - return [("placeholder_batch", os.path.join(subdir, workflow_id))], bucket - # If using -w input, read workflows file to get batch names and workflow IDs - with open(workflows, 'r') as inp: - for line in inp: - if line.strip() == "": - continue - (batch, workflow) = line.strip().split('\t') - batch_dir = os.path.join(subdir, workflow) - batches_dirs.append((batch, batch_dir)) - return batches_dirs, bucket - - -def find_batch_output_files(batch, bucket, prefix, files_dict, output_names, num_outputs): - # Search batch directory for files with specified prefixes - - # Get all objects in directory - storage_client = storage.Client() - blobs = storage_client.list_blobs(bucket, prefix=prefix, - delimiter=None) # only one workflow per batch - assumes caching if multiple - - # Go through each object in directory once, checking if it matches any filenames not yet found - batch_outputs = {file: [] for file in output_names} - names_left = list(output_names) - num_found = 0 - for blob in blobs: - blob_name = blob.name.strip() - # in case multiple files, continue matching on suffixes even if already found file match(es) - for name in output_names: - if blob_name.endswith(files_dict[name]): - blob_path = os.path.join("gs://", bucket, blob_name) # reconstruct URI - if len(batch_outputs[name]) == 0: - num_found += 1 - names_left.remove(name) - batch_outputs[name].append(blob_path) - break - - # Warn if some outputs not found - if num_found < num_outputs: - for name in names_left: - logging.warning(f"{batch} output file {name} not found in gs://{bucket}/{prefix}. Outputting empty string") - - return batch_outputs - - -def sort_files_by_shard(file_list): - # Attempt to sort file list by shard number based on last occurrence of "shard-" in URI - if len(file_list) < 2: - return file_list - regex = r'^(shard-)([0-9]+)(/.*)' # extract shard number for sorting - group 2 - shard_numbers = [] - check_different_shard = None - for file in file_list: - index = file.rfind("shard-") # find index of last occurrence of shard- substring in file path - if index == -1: - return file_list # abandon sorting if no shard- substring - shard = int(re.match(regex, file[index:]).group(2)) - # make sure first two shard numbers actually differ - if check_different_shard is None: - check_different_shard = shard - elif check_different_shard != -1: - if shard == check_different_shard: - return file_list # if first two shard numbers match, then abandon sorting by shard - check_different_shard = -1 - shard_numbers.append(shard) - return [x for _, x in sorted(zip(shard_numbers, file_list), key=lambda pair: pair[0])] - - -def format_batch_line(batch, output_names, batch_outputs): - # Format line with batch and outputs (if not using entities option) - batch_line = batch + "\t" - batch_line += "\t".join(",".join(sort_files_by_shard(batch_outputs[name])) for name in output_names) - batch_line += "\n" - return batch_line - - -def update_entity_outputs(output_names, batch_outputs, entities, entity_outputs): - # Edit entity_outputs dict in place: add new batch outputs to each corresponding entity - for output_index, name in enumerate(output_names): - filepaths = batch_outputs[name] - filenames = [path.split("/")[-1] for path in filepaths] - for entity in entities: # not efficient but should be <500 entities and filenames to search - for i, filename in enumerate(filenames): - # cannot handle Array[File] output for one entity - if entity in filename and entity_outputs[entity][output_index] == "": - entity_outputs[entity][output_index] = filepaths[i] - entity_outputs[entity].append(filepaths[i]) - filenames.remove(filename) - filepaths.remove(filepaths[i]) - break - - -def write_entity_outputs(entity_outputs, keep_all_entities, entities, output_stream): - # Check, format, and write entity outputs - # do write inside function to be able to print line-by-line - for entity in entities: - # check for blank entities - if all(element == "" for element in entity_outputs[entity]): - if keep_all_entities: - logging.info(f"No output files found for entity '{entity}' in provided directories. " - f"Outputting blank entry. Remove -k argument to exclude empty entities.") - else: - logging.info(f"No output files found for entity '{entity}' in provided directories. " - f"Omitting from output. Use -k argument to include empty entities.") - continue - output_stream.write(entity + "\t" + "\t".join(entity_outputs[entity]) + "\n") - - -def retrieve_and_write_output_files(batches_dirs, bucket, files_dict, output_names, output_file, - entities, entity_type, keep_all_entities): - num_outputs = len(output_names) - num_entities = len(entities) - entity_outputs = {entity: [""] * num_outputs for entity in entities} # empty if entities is empty - logging.info("Writing %s" % output_file) - with open(output_file, 'w') as out: - out.write(entity_type + "\t" + "\t".join(output_names) + "\n") - for batch, batch_dir in batches_dirs: - logging.info("Searching for outputs for %s" % batch) - batch_outputs = find_batch_output_files(batch, bucket, batch_dir, files_dict, output_names, num_outputs) - if num_entities > 0: - update_entity_outputs(output_names, batch_outputs, entities, entity_outputs) - else: - batch_line = format_batch_line(batch, output_names, batch_outputs) - out.write(batch_line) - if num_entities > 0: - write_entity_outputs(entity_outputs, keep_all_entities, entities, out) - logging.info("Done!") - - -# Main function -def main(): - parser = argparse.ArgumentParser() - group = parser.add_mutually_exclusive_group(required=True) - group.add_argument("-w", "--workflows-file", - help="TSV file (no header) with batch (or sample) names and workflow IDs (one workflow " - "per batch). Either -i or -w required.") - group.add_argument("-i", "--workflow-id", - help="Workflow ID provided directly on the command line; alternative to -w if only " - "one workflow. Either -i or -w required.") - parser.add_argument("-f", "--filenames", required=True, - help="JSON file with workflow output file names (for column names in output TSV) and a " - "unique filename suffix expected for each workflow output. " - "Format is { \"output_file_name\": \"unique_file_suffix\" }.") - parser.add_argument("-o", "--output-file", required=True, help="Output file path to create") - parser.add_argument("-b", "--bucket", required=True, - help="Google bucket path to search for files - should include all subdirectories " - "preceding the workflow ID, including the workflow name.") - parser.add_argument("-l", "--log-level", required=False, default="INFO", - help="Specify level of logging information, ie. info, warning, error (not case-sensitive). " - "Default: INFO") - parser.add_argument("-e", "--entities-file", required=False, - help="Newline-separated text file of entity (ie. sample, batch) names (no header). " - "Entity here refers to units, like samples within a batch or batches within a cohort, " - "for which the workflow(s) produced outputs; the script expects one output per entity " - "for all outputs, with the filename containing the entity ID provided in the entities " - "file. Output will have one line per entity in the order provided. " - "If multiple batches, outputs will be concatenated and order may be affected.") - parser.add_argument("-t", "--entity-type", required=False, default="batch", - help="Entity type (ie. sample, batch) of each line of output. If using -e, then define " - "what each entity name in the file is (ie. a sample, a batch). Otherwise, define " - "what each workflow corresponds to. This type will be the first column name. " - "Default: batch") - parser.add_argument("-k", "--keep-all-entities", required=False, default=False, action='store_true', - help="With --entities-file, output a line for every entity, even if none of the " - "output files are found.") - args = parser.parse_args() - - # Set logging level from -l input - log_level = args.log_level - numeric_level = getattr(logging, log_level.upper(), None) - if not isinstance(numeric_level, int): - raise ValueError('Invalid log level: %s' % log_level) - logging.basicConfig(level=numeric_level, format='%(levelname)s: %(message)s') - - # Set required arguments. Validate existence of & read filenames JSON - filenames, output_file, bucket = args.filenames, args.output_file, args.bucket # required - check_file_nonempty(filenames) - files_dict, output_names = load_filenames(filenames) - - # Determine workflow IDs from -w or -i arguments. Get subdirectories - workflows, workflow_id = args.workflows_file, args.workflow_id - if workflows is not None: - check_file_nonempty(workflows) - batches_dirs, bucket = get_batch_dirs(workflows, workflow_id, bucket) - - # Set entity arguments and read entities file - entity_type, entities_file, keep_all_entities = args.entity_type, args.entities_file, args.keep_all_entities - entities = read_entities_file(entities_file) - - # Core functionality - retrieve_and_write_output_files(batches_dirs, bucket, files_dict, output_names, output_file, - entities, entity_type, keep_all_entities) - - -if __name__ == "__main__": - main() diff --git a/scripts/cromwell/launch_wdl.sh b/scripts/cromwell/launch_wdl.sh deleted file mode 100755 index 23aa8db4b..000000000 --- a/scripts/cromwell/launch_wdl.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -WDL=$(realpath $1) -CONFIG_FILE=${2:-"$HOME/code/cromwell/cromwell_workflow_options.json"} -VALIDATE=${VALIDATE:-false} - - -SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -GATK_SV_ROOT=$SCRIPT_DIR -while [ $(basename "$GATK_SV_ROOT") != gatk-sv ]; do - GATK_SV_ROOT=$(dirname "$GATK_SV_ROOT") -done - -WDL_FILENAME=$(basename "$WDL") -WDL_NAME=${WDL_FILENAME%.*} - - -RUN_DIR="$GATK_SV_ROOT/runs/$WDL_NAME" -DEPS_ZIP="$RUN_DIR/deps.zip" -rm -rf "$RUN_DIR" -mkdir -p "$RUN_DIR" -cd "$(dirname $WDL)" -zip "$DEPS_ZIP" *.wdl &> /dev/null -cd "$GATK_SV_ROOT" -"$GATK_SV_ROOT/scripts/inputs/build_default_inputs.sh" \ - -d "$GATK_SV_ROOT" \ - > /dev/null - - -echo "Available input jsons:" -printf "%d\t%s\n" 0 "none (skip cromwell submit)" -n=1 -JSON_ARRAY=() -while read INPUT_JSON; do - # printf "%d\t%s\n" $n $(basename "$INPUT_JSON") - printf "%d\t%s\n" $n "$INPUT_JSON" - JSON_ARRAY["$n"]="$INPUT_JSON" - n=$((n+1)) -done < <(find "$GATK_SV_ROOT/inputs/build" -name "$WDL_NAME"*.json | sort) - -read -p "Select input json number: " INPUT_JSON_NUMBER -if [[ $INPUT_JSON_NUMBER == 0 ]]; then - exit 0 -fi -INPUT_JSON=${JSON_ARRAY[$INPUT_JSON_NUMBER]} -cp "$INPUT_JSON" "$RUN_DIR" - -if $VALIDATE; then - womtool validate $WDL -i "$INPUT_JSON" -else - cromshell submit "$WDL" "$INPUT_JSON" "$CONFIG_FILE" "$DEPS_ZIP" -fi - diff --git a/scripts/cromwell/watch_cromshell.sh b/scripts/cromwell/watch_cromshell.sh deleted file mode 100755 index 5b78b564e..000000000 --- a/scripts/cromwell/watch_cromshell.sh +++ /dev/null @@ -1,167 +0,0 @@ -#!/bin/bash -# Usage: watch_cromshell WORKFLOW_ID [sleep_time_sec] -# At regular polling interval, call cromshell-execution-status to get -# status of workflow / subworkflows. If any shards are failing their -# shard number will be listed. Terminates upon completion of workflow. -# -If cromshell is not located on your PATH, then define the ENV -# variable CROMSHELL with the appropriate path. -# -Currently sub-sub-workflows are probably not handled correctly and -# failing shards may incorrectly indicated in this scenario. - -# having problems with this while debugging / improving. Eventually -# should reinstate: -#set -Eeuo pipefail - -WORKFLOW_ID=$1 -SLEEP_TIME=${2:-60} -CROMSHELL=${CROMSHELL:-"cromshell"} - -WORKFLOW_RUNNING=true -FAILED_SHARDS="" -NUM_FAILED=0 - - -function handle_status() { - ID=$1 - DATE_STAMP=$(date) - WF_STATUS_COUNT=$($CROMSHELL execution-status-count $ID 2> /dev/null) - if [ -z "$WF_STATUS_COUNT" ] || [ "$WF_STATUS_COUNT" == "[]" ]; then - # no status count is available. Possibly the job is just starting - # up, or possibly there's an error - WORKFLOW_STATUS="$($CROMSHELL status $ID 2>/dev/null | jq '."status"')" - echo "WORKFLOW_STATUS=$WORKFLOW_STATUS" - if [[ "$WORKFLOW_STATUS" =~ Submitted|Starting|Running ]]; then - # if there are no status values, then the job status may - # still be "Submitted". Check for that or "Running" (in case - # status changes between the two checks. If it's either of - # these, wait another sleep cycle to check status - return 0 - else - # the workflow is in an error state, but there is no data - # about tasks. Probably an error with the WDL - echo "Overall workflow failed with no execution status count" - echo "Error messages:" - METADATA=$($CROMSHELL metadata $WORKFLOW_ID 2>/dev/null) - echo "$METADATA" | grep '"message":' - WORKFLOW_RUNNING=false - return 1 - fi - fi - NUM_TASKS=$(echo "$WF_STATUS_COUNT" | jq 'length') - if [ $NUM_TASKS == 0 ]; then - # haven't started yet, keep waiting - return 0 - fi - echo "$DATE_STAMP" - NUM_TASKS_RUNNING=0 - for ((TASK_IND = 0; TASK_IND < NUM_TASKS; TASK_IND++)); do - TASK_RUNNING=true - TASK_STATUS_COUNT=$(echo "$WF_STATUS_COUNT" | jq ".[$TASK_IND]") - check_status $ID "$TASK_STATUS_COUNT" $TASK_IND - if $TASK_RUNNING; then - ((NUM_TASKS_RUNNING++)) - fi - done - if [[ $NUM_TASKS_RUNNING == 0 ]]; then - WORKFLOW_RUNNING=false - else - echo - fi -} - -function check_status() { - ID=$1 - STATUS_COUNT="$2" - TASK_NAME=$(echo "$STATUS_COUNT" | jq "keys | .[]") - STATUS_VALUES=$(\ - echo "$STATUS_COUNT" \ - | grep -Ev "[]{}[]" | sed -e 's/[":,]//g' -e 's/^ *//' \ - | awk '{print $2 " " $1}' | paste -s -d, - | sed -e 's/,/, /g'\ - ) - NUM_WORKFLOWS=$(\ - echo "$STATUS_VALUES" \ - | awk ' BEGIN { - FS = "[, \t]+" - } { - for (i=1; i/dev/null $CROMSHELL metadata $WORKFLOW_ID) - # get failed shard info by parsing metadata JSON: - # 1) a) get status of each subworkflow as an array - SUBWORKFLOW_STATUS=$( \ - echo "$METADATA" \ - | jq " - .calls.$TASK_NAME - | .[].subWorkflowMetadata.status - " \ - ) - if [ "$(echo "$SUBWORKFLOW_STATUS" | uniq)" == "null" ]; then - # not calling subworkflows, calling tasks, so - # 1) b) get status of each call as an array - # c) filtered out "Preempted" statuses - CALL_STATUS=$( \ - echo "$METADATA" \ - | jq " - .calls.$TASK_NAME - | .[].backendStatus - | select(. != \"Preempted\") - " \ - ) - else - CALL_STATUS=$SUBWORKFLOW_STATUS - fi - # 2) get the indices of "Failed" statuses as an array - # 3) filter out calls that had no failed indices - FAILED_SHARDS=$( \ - echo "$CALL_STATUS" \ - | jq -s ' - indices("Failed") - | tostring - ' \ - | sed 's/[][]//g' \ - ) - NUM_FAILED=$NUM_NEW_FAILED - fi - if [[ ! -z "$FAILED_SHARDS" ]]; then - echo " Failed shards: $FAILED_SHARDS" - fi - fi - - if ! echo "$STATUS_VALUES" | grep -qE "Running|Starting|Submitted"; then - # no more running, don't need to watch any longer - TASK_RUNNING=false - fi -} - -echo "using workflow-id == $WORKFLOW_ID" -echo -handle_status $WORKFLOW_ID -while $WORKFLOW_RUNNING; do - sleep $SLEEP_TIME - handle_status $WORKFLOW_ID -done - -# could give elapsed time with some effort -METADATA=${METADATA:-$(2>/dev/null $CROMSHELL metadata $WORKFLOW_ID)} -START_TIME=$(echo "$METADATA" | grep start | grep -v description \ - | cut -d'"' -f4 | sort | head -n1 \ - | sed -e 's/[A-z]$//' -e 's/[A-z]/ /g') -END_TIME=$(echo "$METADATA" | grep -E '("end"|"endTime")' \ - | cut -d'"' -f4 | sort | tail -n1 \ - | sed -e 's/[A-z]$//' -e 's/[A-z]/ /g') -echo "Start: $START_TIME" -echo "End: $END_TIME" diff --git a/scripts/inputs/calibrate_qc_metrics.py b/scripts/inputs/calibrate_qc_metrics.py deleted file mode 100644 index 0d265e12a..000000000 --- a/scripts/inputs/calibrate_qc_metrics.py +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/python - -# Synopsis: -# Creates a new QC definitions file (valid ranges) from a given Batch run's QC metrics output -# - -import argparse -import math - - -def get_number(val): - try: - return int(val) - except: - try: - return float(val) - except: - return val - - -def get_metric_range(val, args): - val = get_number(val) - if isinstance(val, float): - if math.isnan(val): - return None - else: - return val * (1.0 - args.range), val * (1.0 + args.range) - elif isinstance(val, int): - if val < args.integer_zero_lower: - return 0, args.integer_zero_upper - else: - return round(val * (1.0 - args.range)), round(val * (1.0 + args.range)) - else: - return None - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("metrics_file_batch", type=str, - help="GATKSVPipelineBatch metrics output file") - parser.add_argument("--integer-zero-lower", type=int, default=10, - help="Integer metrics below this value will have their minimum " - "value set to 0 and maximum value set to --integer-zero-upper") - parser.add_argument("--integer-zero-upper", type=int, default=11, - help="See --integer-zero-lower") - parser.add_argument("--range", type=float, default=0.1, - help="Fraction of metric value to define valid range (+/-)") - args = parser.parse_args() - - with open(args.metrics_file_batch, 'r') as f: - for line in f: - key, val = line.rstrip().split('\t') - range = get_metric_range(val, args) - if range is not None: - print(f"{key}\t{range[0]}\t{range[1]}") - - -if __name__ == "__main__": - main() diff --git a/scripts/inputs/convert_sample_ids.py b/scripts/inputs/convert_sample_ids.py new file mode 100644 index 000000000..88040ad6d --- /dev/null +++ b/scripts/inputs/convert_sample_ids.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python + +""" +Converts external sample IDs to safe GATK-SV IDs + +""" + +import argparse +import hashlib +import re +import sys + + +DEFAULT_HASH_LENGTH = 6 +ID_FORMAT = "__{id_name:s}__{id_hash:s}" + + +def convert_id(sample_id, hash_input, hash_length): + if sample_id is None or sample_id == "": + raise ValueError("Encountered None-type or empty id") + if "__" in sample_id: + raise ValueError("Encountered double-underscore in sample id: {:s}".format(sample_id)) + hash_fn = hashlib.sha1() + hash_fn.update(hash_input.encode(encoding='UTF-8', errors='strict')) + id_hash = hash_fn.hexdigest()[:hash_length] + id_no_special_chars = re.sub('[^0-9a-zA-Z]+', '_', sample_id).lower() + return ID_FORMAT.format(id_name=id_no_special_chars, id_hash=id_hash) + + +def convert_ids_list(external_ids, hash_input, hash_length): + if len(external_ids) != len(hash_input): + raise ValueError("There were {:d} external ids but {:d} hash inputs".format(len(external_ids), len(hash_input))) + zipped = zip(external_ids, hash_input) + return [convert_id(sample_id=z[0], hash_input=z[1], hash_length=hash_length) for z in zipped] + + +def read_list_file(path): + with open(path, 'r') as f: + entries = f.read().splitlines() + if len(entries) == 0: + raise ValueError("List empty: {}".format(path)) + return entries + + +def test_ids(external_ids, converted_ids, hash_input, skip_substring): + num_external_ids = len(external_ids) + num_converted_ids = len(converted_ids) + if num_external_ids != num_converted_ids: + raise ValueError("Number of external ids was {:d} but there were {:d} converted ids" + .format(num_external_ids, num_converted_ids)) + + num_hash_inputs = len(hash_input) + num_unique_hash_inputs = len(set(hash_input)) + if num_hash_inputs != num_unique_hash_inputs: + raise ValueError("{:d} hash inputs were provided but only {:d} were unique".format(num_hash_inputs, num_unique_hash_inputs)) + + num_unique_converted_ids = len(set(converted_ids)) + if num_unique_converted_ids != num_converted_ids: + raise ValueError("There are {:d} converted ids but only {:d} are unique".format(num_converted_ids, num_unique_converted_ids)) + + # Check if any converted ID is a substring of another (slow naive approach) + if not skip_substring: + for i in range(num_converted_ids): + other_ids = " ".join(converted_ids[:i] + converted_ids[(i + 1):]) + if converted_ids[i] in other_ids: + for j in range(num_converted_ids): + if j != i and converted_ids[i] in converted_ids[j]: + raise ValueError("Conflicting substring in converted ids: \"{:s}\" \"{:s}\"".format(converted_ids[i], converted_ids[j])) + if (i + 1) % 1000 == 0: + sys.stderr.write("Checked for substring collisions in {} / {} converted ids\n".format(i + 1, num_converted_ids)) + + +def write_ids(converted_ids): + for i in converted_ids: + print(i) + + +def main(): + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('sample_list', type=str, help="Newline-delimited list of external sample IDs") + parser.add_argument('--hash-input', type=str, required=False, + help="Newline-delimited list of corresponding unique strings to hash [default sample_list]") + parser.add_argument('--hash-length', type=int, required=False, default=DEFAULT_HASH_LENGTH, + help="Appended hash length in characters [default {:d}]".format(DEFAULT_HASH_LENGTH)) + parser.add_argument('--skip-substring-check', action='store_true', + help="Skip converted id substring check, which is slow") + args = parser.parse_args() + + external_ids = read_list_file(args.sample_list) + if args.hash_input is not None: + hash_input = read_list_file(args.hash_input) + else: + hash_input = external_ids + converted_ids = convert_ids_list(external_ids=external_ids, hash_input=hash_input, hash_length=args.hash_length) + test_ids(external_ids=external_ids, converted_ids=converted_ids, hash_input=hash_input, skip_substring=args.skip_substring_check) + write_ids(converted_ids) + + +if __name__ == '__main__': + main() diff --git a/scripts/inputs/create_test_batch.py b/scripts/inputs/create_test_batch.py deleted file mode 100644 index fd7d4e3f1..000000000 --- a/scripts/inputs/create_test_batch.py +++ /dev/null @@ -1,132 +0,0 @@ -#!/bin/python - -# Synopsis: -# Creates input values for a new test batch from a GATKSVPipelineBatch run -# - -import argparse -import json -import sys -import os -import tempfile -from google.cloud import storage -from urllib.parse import urlparse - - -INPUT_KEYS = set([ - "name", - "samples", - "bam_or_cram_files", - "requester_pays_crams", - "ped_file", - "contig_ploidy_model_tar", - "gcnv_model_tars", - "qc_definitions", - "outlier_cutoff_table" -]) - - -FILE_LIST_KEYS = set([ - "PE_files", - "SR_files", - "samples", - "std_manta_vcfs", - "std_wham_vcfs", - "std_melt_vcfs", - "std_scramble_vcfs", - "gcnv_model_tars" -]) - - -def replace_output_dir_maybe_list(value, execution_bucket, outputs_dir): - if outputs_dir is None: - return value - if isinstance(value, list): - return [replace_output_dir(v, execution_bucket, outputs_dir) for v in value] - else: - return replace_output_dir(value, execution_bucket, outputs_dir) - - -def replace_output_dir(value, execution_bucket, outputs_dir): - if execution_bucket not in value: - raise ValueError(f"Execution bucket {execution_bucket} not found in output: {value}") - return value.replace(execution_bucket, outputs_dir) - - -def split_bucket_subdir(directory): - # Parse -b URI input into top-level bucket name (no gs://) and subdirectory path - uri = urlparse(directory) - return uri.netloc, uri.path.lstrip("/") - - -def create_file_list(list_key, values_list, file_list_bucket): - # create tmp file with paths list - list_suffix = ".txt" - fd, fname = tempfile.mkstemp(suffix=list_suffix) - with os.fdopen(fd, 'w') as listfile: - listfile.write("\n".join(values_list)) - - # upload file to GCS - client = storage.Client() - dest_bucket, subdir = split_bucket_subdir(file_list_bucket) - bucket = client.get_bucket(dest_bucket) - gcs_blob_name = os.path.join(subdir, list_key + list_suffix) - blob = bucket.blob(gcs_blob_name) - blob.upload_from_filename(fname) - - # delete tmp file - os.unlink(fname) - - # return the GCS path of the paths list file - return os.path.join("gs://", dest_bucket, gcs_blob_name) - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("metadata", help="GATKSVPipelineBatch metadata JSON file") - parser.add_argument("--final-workflow-outputs-dir", help="If used, final_workflow_outputs_dir " - "option from Cromwell config file") - parser.add_argument("--execution-bucket", help="Cromwell execution bucket, required if " - "using --final-workflow-outputs-dir") - parser.add_argument("--file-list-bucket", help="Bucket to which to upload file lists") - args = parser.parse_args() - - execution_bucket = args.execution_bucket - outputs_dir = args.final_workflow_outputs_dir - file_list_bucket = args.file_list_bucket - if outputs_dir is not None: - if execution_bucket is None: - raise ValueError("Must supply --execution-bucket if using --final-workflow-outputs-dir") - if not execution_bucket.startswith("gs://"): - raise ValueError("--execution-bucket must start with gs://") - if not outputs_dir.startswith("gs://"): - raise ValueError("--final-workflow-outputs-dir must start with gs://") - if execution_bucket.endswith('/'): - execution_bucket = execution_bucket[:-1] - if outputs_dir.endswith('/'): - outputs_dir = outputs_dir[:-1] - - with open(args.metadata, 'r') as f: - metadata = json.load(f) - values = {key.replace("GATKSVPipelineBatch.", ""): - replace_output_dir_maybe_list(value, execution_bucket, outputs_dir) - for key, value in metadata["outputs"].items() if value is not None} - inputs = metadata["inputs"] - for raw_key in set(inputs.keys()).intersection(INPUT_KEYS): - key = raw_key.split('.')[-1] - values[key] = inputs[key] - for key in INPUT_KEYS - set(values.keys()): - sys.stderr.write(f"Warning: expected workflow input '{key}' not found in metadata. You will need to add " - f"this entry manually.\n") - values[key] = None - - if file_list_bucket is not None: - for key in set(values.keys()).intersection(FILE_LIST_KEYS): - list_key = key + "_list" - values[list_key] = create_file_list(list_key, values[key], file_list_bucket) - - print(json.dumps(values, sort_keys=True, indent=4)) - - -if __name__ == "__main__": - main() diff --git a/scripts/inputs/get_rename_benchmark_samples_map.py b/scripts/inputs/get_rename_benchmark_samples_map.py deleted file mode 100755 index 7b7bbf4fa..000000000 --- a/scripts/inputs/get_rename_benchmark_samples_map.py +++ /dev/null @@ -1,125 +0,0 @@ -#!/usr/bin/env python - -import sys -import os -import warnings -import argparse -import tarfile -import pysam -import numpy -from typing import List, Text, Dict, Mapping, Iterable, Iterator, Callable, Tuple - -# define type for function that matches sample IDs between cohort VCF and benchmark tar file -SampleIdMatcher = Callable[[List[str], List[str]], Iterator[Tuple[str, str]]] - - -def __parse_arguments(argv: List[Text]) -> argparse.Namespace: - - # noinspection PyTypeChecker - parser = argparse.ArgumentParser( - description="Get TSV file with mapping from sample IDs in external benchmarking set to corresponding sample IDs" - "in cohort VCF", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - prog=argv[0] - ) - parser.add_argument("benchmark_tar", type=str, help="Tar file with bed files used for per-sample benchmarking") - parser.add_argument("cohort_vcf", type=str, help="Cohort VCF to be used in MainVcfQc") - parser.add_argument("save_tsv_file", type=str, help="File to save samples map in TSV format") - - parsed_arguments = parser.parse_args(argv[1:] if len(argv) > 1 else ["--help"]) - return parsed_arguments - - -def match_benchmark_contains_vcf_case_insensitive( - benchmark_sample_ids: Iterable[str], vcf_sample_ids: Iterable[str], -) -> Iterator[Tuple[str, str]]: - """ SampleIdMatcher that assumes benchmark sample IDs contain VCF sample IDs, possibly in a different case """ - def _make_searchable(_sample_ids: Iterable[str]) -> (List[str], List[str]): - # given iterable of sample IDs, make version that is searchable (lower-case, with underscores removed), and sort - # both the searchable sample IDs and originals according to the order of the searchable list - return zip( - *sorted([(_sample_id, _sample_id.lower().strip('_')) for _sample_id in _sample_ids], - key=lambda tup: tup[1]) - ) - benchmark_sample_ids, searchable_benchmark_ids = _make_searchable(benchmark_sample_ids) - vcf_sample_ids, searchable_vcf_sample_ids = _make_searchable(vcf_sample_ids) - for vcf_ind, (benchmark_ind, searchable_benchmark_id) in zip( - numpy.searchsorted(searchable_vcf_sample_ids, searchable_benchmark_ids, side="left"), - enumerate(searchable_benchmark_ids) - ): - if searchable_vcf_sample_ids[vcf_ind] in searchable_benchmark_id: - yield benchmark_sample_ids[benchmark_ind], vcf_sample_ids[vcf_ind] - elif vcf_ind > 0 and searchable_vcf_sample_ids[vcf_ind - 1] in searchable_benchmark_id: - yield benchmark_sample_ids[benchmark_ind], vcf_sample_ids[vcf_ind - 1] - else: - warnings.warn(f"{searchable_benchmark_id} has no match") - - -def main(arguments: List[str]): - """ Get command-line arguments, create map from benchmark sample ID to cohort VCF sample ID, save as TSV """ - options = __parse_arguments(arguments) - samples_map = get_rename_benchmark_samples_map( - benchmark_tar=options.benchmark_tar, - cohort_vcf=options.cohort_vcf, - get_sample_id_matches=match_benchmark_contains_vcf_case_insensitive - ) - save_samples_map_tsv(samples_map=samples_map, save_tsv_file=options.save_tsv_file) - - -def get_rename_benchmark_samples_map( - benchmark_tar: str, - cohort_vcf: str, - get_sample_id_matches: SampleIdMatcher = match_benchmark_contains_vcf_case_insensitive -) -> Dict[str, str]: - """ - Get mapping from benchmark sample IDs to cohort sample IDs. This function is defined with passable matching function - in case future cohort VCFs / benchmark data sets require a different logic. - Args: - benchmark_tar: str - path to tar file with benchmarking BED files - cohort_vcf: str - path to cohort VCF - get_sample_id_matches: SampleIdMatcher (default=match_benchmark_startswith_vcf_lower) - Callable that takes list of sample IDs from the benchmark set and a list of sample IDs from the cohort VCF, - and yields tuples of matching (benchmark_sample_id, cohort sample_id) pairs - Returns: - benchmark_samples_map: Dict[str, str] - Mapping from benchmark sample ID to corresponding cohort VCF sample ID - """ - benchmark_sample_ids = get_benchmark_sample_ids(benchmark_tar) - vcf_sample_ids = get_vcf_sample_ids(cohort_vcf) - return { - benchmark_id: vcf_id for benchmark_id, vcf_id in get_sample_id_matches(benchmark_sample_ids, vcf_sample_ids) - } - - -def get_vcf_sample_ids(vcf: str) -> List[str]: - """ Extract sample IDs from VCF """ - with pysam.VariantFile(vcf, 'r') as f_in: - return list(f_in.header.samples) - - -def get_benchmark_sample_ids(benchmark_tar: str) -> List[str]: - """ Extract sample IDs from benchmark tar file """ - return [os.path.basename(bed_file).split('.', 1)[0] for bed_file in get_tar_files(benchmark_tar) - if bed_file.endswith(".bed.gz")] - - -def get_tar_files(path_to_tar_file: str) -> List[str]: - """ Get contents of tar file """ - with tarfile.open(path_to_tar_file, 'r') as tar_in: - return [tar_info.name for tar_info in tar_in.getmembers()] - - -def save_samples_map_tsv( - samples_map: Mapping[str, str], - save_tsv_file: str -): - """ Save mapping from benchmark sample ID to cohort sample ID in TSV format """ - with open(save_tsv_file, 'w') as f_out: - for benchmark_sample_id, vcf_sample_id in samples_map.items(): - f_out.write(f"{benchmark_sample_id}\t{vcf_sample_id}\n") - - -if __name__ == "__main__": - main(sys.argv) diff --git a/scripts/test/check_gs_urls.py b/scripts/test/check_gs_urls.py deleted file mode 100644 index 698929e5c..000000000 --- a/scripts/test/check_gs_urls.py +++ /dev/null @@ -1,77 +0,0 @@ -#!/bin/python - -from google.cloud import storage -from google.api_core import exceptions -import json -import argparse -from urllib.parse import urlparse - -# Synopsis: -# This script checks if all GCS URLs in a Cromwell input JSON file exist. URLs that do not exist are printed to stdout. -# -# Requirements: -# Python >= 3.5 -# Google Cloud Storage Python API -# - Install with "pip install google-cloud-storage" -# -# Usage: -# python check_gs_urls.py inputs.json -# -# Parameters: -# inputs.json : workflow input file -# -# Author: Mark Walker (markw@broadinstitute.org) - -# URI scheme for Cloud Storage. -GOOGLE_STORAGE = 'gs' - -# Checks if the string is a Google bucket URL - - -def is_gcs_url(str): - return urlparse(str).scheme == GOOGLE_STORAGE - -# Checks if the object exists in GCS - - -def check_gcs_url(source_uri, client, project_id): - def _parse_uri(uri): - parsed = urlparse(uri) - bucket_name = parsed.netloc - bucket_object = parsed.path[1:] - return bucket_name, bucket_object - source_bucket_name, source_blob_name = _parse_uri(source_uri) - source_bucket = client.bucket(source_bucket_name, user_project=project_id) - source_blob = source_bucket.blob(source_blob_name) - try: - if not source_blob.exists(): - print(f"{source_uri} not found") - return False - except exceptions.BadRequest as e: - print(e) - return True - -# Main function - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("inputs_json") - parser.add_argument("--project-id", required=False, - help="Project ID to charge for requester pays buckets") - args = parser.parse_args() - - with open(args.inputs_json, 'r') as f: - client = storage.Client() - inputs = json.load(f) - for x in inputs: - if isinstance(inputs[x], str) and is_gcs_url(inputs[x]): - check_gcs_url(inputs[x], client, args.project_id) - elif isinstance(inputs[x], list): - for y in inputs[x]: - if is_gcs_url(y): - check_gcs_url(y, client, args.project_id) - - -if __name__ == "__main__": - main() diff --git a/scripts/test/compare_files.py b/scripts/test/compare_files.py deleted file mode 100644 index 1ac77b6c2..000000000 --- a/scripts/test/compare_files.py +++ /dev/null @@ -1,274 +0,0 @@ -import argparse -import gzip -import json -import os -from metadata import ITaskOutputFilters, Metadata -from subprocess import DEVNULL, STDOUT, check_call - - -# For coloring the prints; see the following SO -# answer for details: https://stackoverflow.com/a/287944/947889 -COLOR_ENDC = "\033[0m" -COLOR_ULINE = "\033[04m" -COLOR_BLINKING = "\033[05m" -COLOR_RED = "\033[91m" -COLOR_GREEN = "\033[92m" -COLOR_YELLOW = "\033[93m" - - -class FilterBasedOnExtensions(ITaskOutputFilters): - - def __init__(self, extensions): - self.extensions = extensions - - def filter(self, metadata, outputs): - """ - Iterates through the outputs of a task and - filters the outputs whose file type match - the types subject to comparison (i.e., - types defined in filetypes_to_compare). - - :return: An array of the filtered outputs. - """ - filtered_outputs = {} - if not isinstance(outputs, list): - outputs = [outputs] - - for task_output in outputs: - if not isinstance(task_output, str): - # Happens when output is not a file, - # e.g., when it is a number. - continue - for ext in self.extensions: - if task_output.endswith(ext): - if ext not in filtered_outputs: - filtered_outputs[ext] = [] - filtered_outputs[ext].append(task_output) - return filtered_outputs - - -class BaseCompareAgent: - def __init__(self, working_dir=None): - self.working_dir = working_dir - - def get_filename(self, obj): - return obj.replace("gs://", os.path.join(self.working_dir, "")) - - def get_obj(self, obj): - """ - Ensures the given Google Cloud Storage - object (obj) is available in the working - directory, and returns its filename in - the working directory. - """ - raise NotImplementedError - - -class VCFCompareAgent(BaseCompareAgent): - def __init__(self, working_dir=None): - super(VCFCompareAgent, self).__init__(working_dir) - - # Delimiter - self.d = "\t" - self.id_col = 2 - - def get_obj(self, obj): - """ - Ensures the given VCF object is - available in the working directory: - if it exists, returns its filename, and - If it does not, downloads the VCF object and - returns its filename. - """ - filename = self.get_filename(obj) - if not os.path.isfile(filename): - if not os.path.isfile(filename): - check_call( - ["gsutil", "-m", "cp", obj, filename], - stdout=DEVNULL, stderr=STDOUT) - return filename - - def equals(self, x, y): - """ - Gets two VCF objects (Google Cloud Storage URI), - x and y, and returns true if files are identical, - and false if otherwise. Additionally, it returns the - compared files. - """ - x = self.get_obj(x) - y = self.get_obj(y) - - with gzip.open(x, "rt", encoding="utf-8") as X, \ - gzip.open(y, "rt", encoding="utf-8") as Y: - for x_line, y_line in zip(X, Y): - if x_line.startswith("#") and y_line.startswith("#"): - continue - - x_columns = x_line.strip().split(self.d) - y_columns = y_line.strip().split(self.d) - - if len(x_columns) != len(y_columns): - return False, x, y - - if any(x_columns[c] != y_columns[c] - for c in range(0, len(x_columns)) - if c != self.id_col): - return False, x, y - return True, x, y - - -class CompareWorkflowOutputs: - def __init__(self, working_dir): - self.working_dir = working_dir - self.filetypes_to_compare = { - "vcf.gz": VCFCompareAgent(self.working_dir) - } - - def get_mismatches(self, reference_metadata, - target_metadata, - traverse_sub_workflows=False): - """ - Takes two metadata files (both belonging to a common - workflow execution), iterates through the outputs of - their task, downloads the objects if not already exist - in the working directory, compares the corresponding - files, and returns the files that do not match. - """ - def record_compare_result(match, reference, target): - if not match: - if call not in mismatches: - mismatches[call] = [] - mismatches[call].append([reference, target]) - - # First we define a method that takes a list - # of a task outputs, and keeps only those that - # are files and their extension match the - # file types that we want to compare - # (e.g., filter only VCF files). - filter_method = FilterBasedOnExtensions( - self.filetypes_to_compare.keys()).filter - - # Then we create two instances of the Metadata - # class, one for each metadata file, and we - # invoke the `get_outputs` method which traverses - # the outputs of task, and returns those filtered - # by the above-defined filter. - ref_output_files = Metadata(reference_metadata).get_outputs( - traverse_sub_workflows, filter_method) - test_output_files = Metadata(target_metadata).get_outputs( - traverse_sub_workflows, filter_method) - - mismatches = {} - i = 0 - - r_t = ref_output_files.keys() - test_output_files.keys() - t_r = test_output_files.keys() - ref_output_files.keys() - if r_t or t_r: - print(f"\n{COLOR_BLINKING}WARNING!{COLOR_ENDC}") - print(f"The reference and test metadata files differ " - f"in their outputs; " - f"{COLOR_ULINE}the differences will be skipped.{COLOR_ENDC}") - if r_t: - print(f"\t{len(r_t)}/{len(ref_output_files.keys())} " - f"outputs of the reference are not in the test:") - for x in r_t: - print(f"\t\t- {x}") - if t_r: - print(f"\t{len(t_r)}/{len(test_output_files.keys())} " - f"outputs of the test are not in the reference:") - for x in t_r: - print(f"\t\t- {x}") - print("\n") - - [ref_output_files.pop(x) for x in r_t] - print(f"{COLOR_YELLOW}Comparing {len(ref_output_files)} " - f"files that are common between reference and test " - f"metadata files and their respective task is executed " - f"successfully.{COLOR_ENDC}") - for call, ref_outputs in ref_output_files.items(): - i += 1 - matched = True - print(f"Comparing\t{i}/{len(ref_output_files)}\t{call} ... ", end="") - for extension, objs in ref_outputs.items(): - if len(objs) != len(test_output_files[call][extension]): - record_compare_result(False, objs, test_output_files[call][extension]) - matched = False - continue - for idx, obj in enumerate(objs): - equals, x, y = \ - self.filetypes_to_compare[extension].equals( - obj, test_output_files[call][extension][idx]) - record_compare_result(equals, x, y) - if not equals: - matched = False - if matched: - print(f"{COLOR_GREEN}match{COLOR_ENDC}") - else: - print(f"{COLOR_RED}mismatch{COLOR_ENDC}") - return mismatches - - -def main(): - parser = argparse.ArgumentParser( - description="Takes two cromwell metadata files as input, " - "reference and target, compares their corresponding " - "output files, and reports the files that do not match. " - "The two metadata files should belong to the execution " - "of a common workflow (e.g., one workflow with different " - "inputs). The script requires `gsutil` and `gzip` to be " - "installed and configured. If the output of a task is an " - "array of files, the reference and target arrays are " - "expected to be in the same order." - "\n\n" - "The currently supported file types are as follows." - "\n\t- VCF (.vcf.gz): The non-header lines of VCF files" - "are compared; except for the ID column, all the other " - "columns of a variation are expected to be identical. " - "The two files are expected to be equally ordered (i.e., " - "n-th variation in one file is compared to the " - "n-th variation on the other file).", - formatter_class=argparse.RawTextHelpFormatter) - - parser.add_argument( - "reference_metadata", - help="Reference cromwell metadata file.") - parser.add_argument( - "target_metadata", - help="Target cromwell metadata file.") - parser.add_argument( - "-w", "--working_dir", - help="The directory where the files will " - "be downloaded; default is the " - "invocation directory.") - parser.add_argument( - "-o", "--output", - help="Output file to store mismatches " - "(in JSON format); defaults to `output.json`.") - parser.add_argument( - "-d", "--deep", - action="store_true", - help="Include sub-workflows traversing the metadata files.") - - args = parser.parse_args() - - wd = args.working_dir if args.working_dir else "." - comparer = CompareWorkflowOutputs(wd) - mismatches = comparer.get_mismatches( - args.reference_metadata, - args.target_metadata, - args.deep) - - if len(mismatches) == 0: - print(f"{COLOR_GREEN}All the compared files matched.{COLOR_ENDC}") - else: - print(f"{COLOR_RED}{len(mismatches)} of the compared files did not match.{COLOR_ENDC}") - output_file = \ - args.output if args.output else \ - os.path.join(wd, "output.json") - with open(output_file, "w") as f: - json.dump(mismatches, f, indent=2) - print(f"Mismatches are persisted in {output_file}.") - - -if __name__ == '__main__': - main() diff --git a/scripts/test/metadata.py b/scripts/test/metadata.py deleted file mode 100644 index ffb048e0e..000000000 --- a/scripts/test/metadata.py +++ /dev/null @@ -1,129 +0,0 @@ -import json -import types -from abc import ABC, abstractmethod - - -class ITaskOutputFilters(ABC): - """ - An interface that should be implemented by - custom filtering methods to be used with Metadata. - - This design follows the principles of strategy pattern, - where a custom method can be used to augment the default - behavior of an algorithm. Here, this design is used to - decouple the filtering of tasks outputs (e.g., only extract - files with certain extension) from metadata traversal. - """ - - @abstractmethod - def filter(self, metadata, outputs): - """ - How to filter the output of a task. - - Note that the method is stateful; i.e., - it has references to both self and to - the instance of Metadata class that - invokes this method. - - :param metadata: `self` of the instance - of the Metadata class that calls this method. - - :param outputs: The values of a key in the - `outputs` field in a metadata file. e.g., - `metadata` in the following object is `a.vcf`: - 'outputs': {'merged': 'a.vcf'} - - :return: Filtered task outputs. - """ - pass - - -class Metadata: - """ - Implements utilities for traversing, processing, and - querying the resulting metadata (in JSON) of running - a workflow on Cromwell. - """ - def __init__(self, filename): - self.filename = filename - - @staticmethod - def _get_output_label(parent_workflow, workflow, output_var, shard_index): - """ - Composes a label for a task output. - :return: Some examples of constructed labels are: - - GATKSVPipelineSingleSample.Module00c.Module00c.PreprocessPESR.std_manta_vcf - - Module00c.PreprocessPESR.PreprocessPESR.StandardizeVCFs.std_vcf.0 - """ - return \ - ((parent_workflow + ".") if parent_workflow else "") + \ - f"{workflow}.{output_var}" + \ - (("." + str(shard_index)) if shard_index != -1 else "") - - @staticmethod - def _get_filtered_outputs(outputs): - return outputs - - def _traverse_outputs(self, calls, parent_workflow="", deep=False): - output_files = {} - - def update_output_files(outputs): - if run["executionStatus"] == "Done" and len(outputs) > 0: - output_files[self._get_output_label( - parent_workflow, workflow, out_label, - run["shardIndex"])] = outputs - - for workflow, runs in calls.items(): - for run in runs: - if "outputs" in run: - for out_label, out_files in run["outputs"].items(): - if not out_files: - continue - update_output_files(self._get_filtered_outputs(out_files)) - if deep and "subWorkflowMetadata" in run: - output_files.update( - self._traverse_outputs( - run["subWorkflowMetadata"]["calls"], - workflow, deep)) - return output_files - - def get_outputs(self, include_sub_workflows=False, filter_method=None): - """ - Iterates through a given cromwell metadata file - and filters the output files to be compared. - - :param include_sub_workflows: Boolean, if set to True, - output files generated in sub-workflows will be traversed. - - :param filter_method: A method to override the default - filter method. This method should be implement the - ITaskOutputFilters interface. Every traversed output of tasks - will be passed to this method, and this method's returned - value will be aggregated and returned. For instance, see - FilterBasedOnExtensions class for how the filter method can - be used to extract files with certain extension from the - metadata. - - :return: A dictionary with keys and values being a composite label - for tasks outputs and the values of the task output, respectively. - For instance (serialized to JSON and simplified for brevity): - { - "GATKSVPipelineSingleSample.FilterMelt.out":{ - "vcf.gz":["NA12878.melt.NA12878.vcf.gz"] - } - } - """ - if filter_method: - if not issubclass(type(filter_method.__self__), - ITaskOutputFilters): - raise TypeError(f"The class {type(filter_method.__self__)} " - f"should implement the interface " - f"{ITaskOutputFilters}.") - self._get_filtered_outputs = types.MethodType(filter_method, self) - - with open(self.filename, "r") as metadata_file: - metadata = json.load(metadata_file) - output_files = self._traverse_outputs( - metadata["calls"], - deep=include_sub_workflows) - return output_files diff --git a/wdl/AnnoRdPeSr.wdl b/wdl/AnnoRdPeSr.wdl deleted file mode 100644 index caeb5de28..000000000 --- a/wdl/AnnoRdPeSr.wdl +++ /dev/null @@ -1,125 +0,0 @@ -version 1.0 - -import "Structs.wdl" - -import "Duphold.wdl" as duphold -import "RdPeSrAnno.wdl" as rdpesr - -workflow AnnoRdPeSr { - input{ - - File pe_matrix - File pe_index - File sr_matrix - File sr_index - File rd_matrix - File rd_index - - File contig_list - - File bed - File bed_le_flank - File bed_ri_flank - String sample - String prefix - - String rdpesr_benchmark_docker - String sv_base_mini_docker - String sv_pipeline_docker - - RuntimeAttr? runtime_attr_rdpesr - } - - call RunRdPeSrAnnotation{ - input: - prefix = prefix, - bed = bed, - bed_le_flank = bed_le_flank, - bed_ri_flank = bed_ri_flank, - pe_matrix = pe_matrix, - pe_index = pe_index, - sr_matrix = sr_matrix, - sr_index = sr_index, - rd_matrix = rd_matrix, - rd_index = rd_index, - rdpesr_benchmark_docker = rdpesr_benchmark_docker, - runtime_attr_override = runtime_attr_rdpesr - } - - output{ - File PesrAnno = RunRdPeSrAnnotation.pesr_anno - File RdAnno = RunRdPeSrAnnotation.cov - File RdAnno_le = RunRdPeSrAnnotation.cov_le_flank - File RdAnno_ri = RunRdPeSrAnnotation.cov_ri_flank - } - } - -task RunRdPeSrAnnotation{ - input{ - String prefix - File bed - File bed_le_flank - File bed_ri_flank - File pe_matrix - File pe_index - File sr_matrix - File sr_index - File rd_matrix - File rd_index - String rdpesr_benchmark_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 15, - disk_gb: 20, - boot_disk_gb: 10, - preemptible_tries: 1, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - output { - File cov = "~{filebase}.bed.Rd.gz" - File cov_ri_flank = "~{filebase}.ri_flank.Rd.gz" - File cov_le_flank = "~{filebase}.le_flank.Rd.gz" - File pesr_anno = "~{filebase}.bed.PeSr.gz" - } - - String filebase = basename(bed,".bed") - - command <<< - - set -Eeuo pipefail - - - zcat ~{rd_matrix} | grep -v '@' | grep -v CONTIG |bgzip > bincov.tsv.gz - Rscript /src/bincov_to_normCov.R -i bincov.tsv.gz - bgzip normCov.tsv - tabix -b 2 -e 2 normCov.tsv.gz - - python3 /src/add_RD_to_SVs.py ~{bed} normCov.tsv.gz ~{filebase}.bed.Rd - python3 /src/add_RD_to_SVs.py ~{bed_le_flank} normCov.tsv.gz ~{filebase}.le_flank.Rd - python3 /src/add_RD_to_SVs.py ~{bed_ri_flank} normCov.tsv.gz ~{filebase}.ri_flank.Rd - python3 /src/add_SR_PE_to_PB_INS.V2.py ~{bed} ~{pe_matrix} ~{sr_matrix} ~{filebase}.bed.PeSr - - bgzip ~{filebase}.bed.Rd - bgzip ~{filebase}.ri_flank.Rd - bgzip ~{filebase}.le_flank.Rd - bgzip ~{filebase}.bed.PeSr - - >>> - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: rdpesr_benchmark_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - diff --git a/wdl/AnnotateCleanVcfWithFilteringResults.wdl b/wdl/AnnotateCleanVcfWithFilteringResults.wdl deleted file mode 100644 index b66ac6e3d..000000000 --- a/wdl/AnnotateCleanVcfWithFilteringResults.wdl +++ /dev/null @@ -1,499 +0,0 @@ -version 1.0 - -#script to annotate cleanvcf with the results from downstream filterings - -import "Structs.wdl" - -workflow IntegrateFilteringResultsToCleanVcf { - input{ - File clean_vcf - File clean_vcf_idx - File minGQ_vcf - File minGQ_vcf_idx - File outlier_removal_vcf - File outlier_removal_vcf_idx - File batch_effect_vcf - File batch_effect_vcf_idx - File final_recali_vcf - File final_recali_vcf_idx - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_anno_FinalRecali - RuntimeAttr? runtime_attr_anno_BatchEffect - RuntimeAttr? runtime_attr_anno_OutlierFilter - RuntimeAttr? runtime_attr_anno_minGQ - RuntimeAttr? runtime_attr_ConcatVcfs - } - - call AnnotateFinalRecali{ - input: - vcf = batch_effect_vcf, - filtered_vcf = final_recali_vcf, - sv_pipeline_docker = sv_pipeline_docker, - runtime_attr_override = runtime_attr_anno_FinalRecali - } - - call AnnotateBatchEffect{ - input: - vcf = outlier_removal_vcf, - filtered_vcf = AnnotateFinalRecali.output_vcf, - sv_pipeline_docker = sv_pipeline_docker, - runtime_attr_override = runtime_attr_anno_BatchEffect - } - - call AnnotateOutlierFilter{ - input: - vcf = minGQ_vcf, - filtered_vcf = AnnotateBatchEffect.output_vcf, - sv_pipeline_docker = sv_pipeline_docker, - runtime_attr_override = runtime_attr_anno_OutlierFilter - } - - call AnnotateMinGQ{ - input: - vcf = clean_vcf, - filtered_vcf = AnnotateOutlierFilter.output_vcf, - prefix = prefix, - sv_pipeline_docker = sv_pipeline_docker, - runtime_attr_override = runtime_attr_anno_minGQ - } - - output{ - File annotated_vcf = AnnotateMinGQ.output_vcf - File annotated_vcf_idx = AnnotateMinGQ.output_vcf_idx - } -} - -task AnnotateFinalRecali{ - input{ - File vcf - File filtered_vcf - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - command <<< - - set -euo pipefail - - zcat ~{filtered_vcf} |fgrep -v "#" |cut -f3,7 > SVID_filter - - python <']) - - - fo=open("annotated.vcf",'w') - for i in header: - print(' '.join(i), file=fo) - - fin=os.popen(r'''zcat %s'''%("~{vcf}")) - for line in fin: - pin=line.strip().split() - if pin[0][:2]=='##': continue - elif pin[0][0]=='#': print('\t'.join(pin), file=fo) - else: - if pin[2] in svid_hash.keys(): - pin[6]=svid_hash[pin[2]] - else: - pin[6]='FAIL_FINAL_RECALIBRATION' - print('\t'.join(pin), file=fo) - fin.close() - fo.close() - CODE - - bgzip annotated.vcf - tabix annotated.vcf.gz - >>> - - output{ - File output_vcf = "annotated.vcf.gz" - File output_vcf_idx = "annotated.vcf.gz.tbi" - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 0 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task AnnotateBatchEffect{ - input{ - File vcf - File filtered_vcf - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - command <<< - - set -euo pipefail - - zcat ~{filtered_vcf} | grep -v "#" | cut -f3,7 > SVID_filter - - python <']) - - fo=open("annotated.vcf",'w') - for i in header: - print(' '.join(i), file=fo) - - fin=os.popen(r'''zcat %s'''%("~{vcf}")) - for line in fin: - pin=line.strip().split() - if pin[0][:2]=='##': continue - elif pin[0][0]=='#': print('\t'.join(pin), file=fo) - else: - if pin[2] in svid_hash.keys(): - pin[6]=svid_hash[pin[2]] - else: - pin[6]='FAIL_BATCH_EFFECT' - print('\t'.join(pin), file=fo) - fin.close() - fo.close() - CODE - - bgzip annotated.vcf - tabix annotated.vcf.gz - >>> - - output{ - File output_vcf = "annotated.vcf.gz" - File output_vcf_idx = "annotated.vcf.gz.tbi" - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 0 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task AnnotateOutlierFilter{ - input{ - File vcf - File filtered_vcf - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - command <<< - - set -euo pipefail - - zcat ~{filtered_vcf} | grep -v "#" | cut -f3,7 > SVID_filter - - python <']) - - fo=open("annotated.vcf",'w') - for i in header: - print(' '.join(i), file=fo) - - fin=os.popen(r'''zcat %s'''%("~{vcf}")) - for line in fin: - pin=line.strip().split() - if pin[0][:2]=='##': continue - elif pin[0][0]=='#': print('\t'.join(pin), file=fo) - else: - if pin[2] in svid_hash.keys(): - pin[6]=svid_hash[pin[2]] - else: - pin[6]='FAIL_OUTLIER_REMOVAL' - print('\t'.join(pin), file=fo) - fin.close() - fo.close() - CODE - - bgzip annotated.vcf - tabix annotated.vcf.gz - >>> - - output{ - File output_vcf = "annotated.vcf.gz" - File output_vcf_idx = "annotated.vcf.gz.tbi" - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 0 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task AnnotateMinGQ{ - input{ - File vcf - File filtered_vcf - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - command <<< - - set -euo pipefail - - zcat ~{filtered_vcf} | grep -v "#" | cut -f3,7 > SVID_filter - - python <']) - - fo=open("annotated.vcf",'w') - for i in header: - print(' '.join(i), file=fo) - - fin=os.popen(r'''zcat %s'''%("~{vcf}")) - for line in fin: - pin=line.strip().split() - if pin[0][:2]=='##': continue - elif pin[0][0]=='#': print('\t'.join(pin), file=fo) - else: - if pin[2] in svid_hash.keys(): - pin[6]=svid_hash[pin[2]] - else: - pin[6]='FAIL_minGQ' - print('\t'.join(pin), file=fo) - fin.close() - fo.close() - CODE - - bgzip annotated.vcf - mv annotated.vcf.gz ~{prefix}.filter_annotated.vcf.gz - tabix ~{prefix}.filter_annotated.vcf.gz - >>> - - output{ - File output_vcf = "~{prefix}.filter_annotated.vcf.gz" - File output_vcf_idx = "~{prefix}.filter_annotated.vcf.gz.tbi" - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 0 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task AnnotateWithFilterResults{ - input{ - File vcf - File filtered_vcf - String prefix - String new_header - String new_filter - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - command <<< - - set -euo pipefail - - zcat ~{filtered_vcf} | grep -v "#" | cut -f3,7 > SVID_filter - - python <>> - - output{ - File output_vcf = "~{prefix}.filter_annotated.vcf.gz" - File output_vcf_idx = "~{prefix}.filter_annotated.vcf.gz.tbi" - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 0 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} diff --git a/wdl/AnnotateILFeatures.wdl b/wdl/AnnotateILFeatures.wdl deleted file mode 100644 index 220d44f18..000000000 --- a/wdl/AnnotateILFeatures.wdl +++ /dev/null @@ -1,898 +0,0 @@ -version 1.0 - -import "Structs.wdl" -import "TasksMakeCohortVcf.wdl" as MiniTasks -import "TasksBenchmark.wdl" as tasks10 - -import "Duphold.wdl" as duphold -import "RdPeSrAnno.wdl" as rdpesr - -workflow AnnoILFeatures { - input{ - String prefix - String il_bam - String il_bam_bai - File vcf_file - - File pe_matrix - File pe_index - File sr_matrix - File sr_index - File rd_matrix - File rd_index - - File ref_SegDup - File ref_SimpRep - File ref_RepMask - - File ref_fasta - File ref_fai - File ref_dict - File contig_list - - Array[File] raw_vcfs - Array[String] raw_algorithms - - String rdpesr_benchmark_docker - String vapor_docker - String duphold_docker - String sv_base_mini_docker - String sv_pipeline_docker - - RuntimeAttr? runtime_attr_Vapor - RuntimeAttr? runtime_attr_duphold - RuntimeAttr? runtime_attr_rdpesr - RuntimeAttr? runtime_attr_bcf2vcf - RuntimeAttr? runtime_attr_LocalizeCram - RuntimeAttr? runtime_attr_vcf2bed - RuntimeAttr? runtime_attr_SplitVcf - RuntimeAttr? runtime_attr_ConcatBeds - RuntimeAttr? runtime_attr_ConcatVcfs - } - - call vcf2bed{ - input: - vcf = vcf_file, - prefix = prefix, - sv_pipeline_docker = sv_pipeline_docker, - runtime_attr_override = runtime_attr_vcf2bed - } - - call RunGenomicContextAnnotation{ - input: - bed = vcf2bed.bed, - prefix = prefix, - ref_SegDup = ref_SegDup, - ref_SimpRep = ref_SimpRep, - ref_RepMask = ref_RepMask, - rdpesr_benchmark_docker = rdpesr_benchmark_docker, - runtime_attr_override = runtime_attr_rdpesr - } - - call Bed2QueryAndRef{ - input: - bed = vcf2bed.bed, - sv_base_mini_docker = sv_base_mini_docker - } - - call ExtracGTGQ{ - input: - prefix = prefix, - vcf_file = vcf_file, - sv_pipeline_docker = sv_pipeline_docker - } - - scatter (i in range(length(raw_vcfs))){ - - call vcf2bed as vcf2bed_raw{ - input: - vcf = raw_vcfs[i], - prefix = "${prefix}.${raw_algorithms[i]}", - sv_pipeline_docker = sv_pipeline_docker, - runtime_attr_override = runtime_attr_vcf2bed - } - - - call Bed2QueryAndRef as Bed2QueryAndRef_Raw{ - input: - bed = vcf2bed_raw.bed, - sv_base_mini_docker = sv_base_mini_docker - } - - call BedComparison as BedComparison_vs_raw{ - input: - query = Bed2QueryAndRef_Raw.query, - ref = Bed2QueryAndRef.ref, - prefix = "${prefix}.vs.${raw_algorithms[i]}", - sv_pipeline_docker=sv_pipeline_docker - } - } - - call ExtracAlgorithmEvidenceFilter{ - input: - prefix = prefix, - vcf_file = vcf_file, - sv_pipeline_docker=sv_pipeline_docker - } - - call vcf2bed as vcf2bed_all{ - input: - vcf = vcf_file, - prefix = prefix, - sv_pipeline_docker = sv_pipeline_docker, - runtime_attr_override = runtime_attr_vcf2bed - } - - call RunRdPeSrAnnotation{ - input: - prefix = prefix, - bed = vcf2bed_all.bed, - pe_matrix = pe_matrix, - pe_index = pe_index, - sr_matrix = sr_matrix, - sr_index = sr_index, - rd_matrix = rd_matrix, - rd_index = rd_index, - ref_fasta = ref_fasta, - ref_fai = ref_fai, - ref_dict=ref_dict, - rdpesr_benchmark_docker = rdpesr_benchmark_docker, - runtime_attr_override = runtime_attr_rdpesr - } - - Array[String] contigs = transpose(read_tsv(contig_list))[0] - scatter ( contig in contigs ) { - call tasks10.SplitVcf as SplitVcf{ - input: - contig = contig, - vcf_file = vcf_file, - sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_override=runtime_attr_SplitVcf - } - - call tasks10.LocalizeCramRequestPay as LocalizeCramIL{ - input: - contig = contig, - ref_fasta=ref_fasta, - ref_fai=ref_fai, - ref_dict=ref_dict, - project_id="talkowski-sv-gnomad", - bam_or_cram_file=il_bam, - bam_or_cram_index=il_bam_bai, - sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_override=runtime_attr_LocalizeCram - } - - call ShiftVcfForDuphold{ - input: - prefix = contig, - vcf_file = SplitVcf.contig_vcf, - vcf_index = SplitVcf.contig_vcf_index, - ref_fai = ref_fai, - rdpesr_benchmark_docker = rdpesr_benchmark_docker, - runtime_attr_override = runtime_attr_duphold - } - - call RunDupholdPerContig as RunDupholdPerContigIL{ - input: - prefix = prefix, - contig = contig, - bam_or_cram_file=LocalizeCramIL.local_bam, - bam_or_cram_index=LocalizeCramIL.local_bai, - vcf_file = SplitVcf.contig_vcf, - vcf_index = SplitVcf.contig_vcf_index, - vcf_le_file = ShiftVcfForDuphold.le_flank, - vcf_le_index = ShiftVcfForDuphold.le_flank_index, - vcf_ri_file = ShiftVcfForDuphold.ri_flank, - vcf_ri_index = ShiftVcfForDuphold.ri_flank_index, - ref_fasta = ref_fasta, - ref_fai = ref_fai, - ref_dict = ref_dict, - rdpesr_benchmark_docker = duphold_docker, - runtime_attr_override = runtime_attr_duphold - } - - call Bcf2Vcf as Bcf2VcfIL{ - input: - prefix = prefix, - contig = contig, - bcf = RunDupholdPerContigIL.bcf, - sv_base_mini_docker = sv_base_mini_docker, - runtime_attr_override = runtime_attr_bcf2vcf - } - - call Bcf2Vcf as Bcf2VcfIL_le_flank{ - input: - prefix = prefix, - contig = contig, - bcf = RunDupholdPerContigIL.bcf_le, - sv_base_mini_docker = sv_base_mini_docker, - runtime_attr_override = runtime_attr_bcf2vcf - } - - call Bcf2Vcf as Bcf2VcfIL_ri_flank{ - input: - prefix = prefix, - contig = contig, - bcf = RunDupholdPerContigIL.bcf_ri, - sv_base_mini_docker = sv_base_mini_docker, - runtime_attr_override = runtime_attr_bcf2vcf - } - - } - - call MiniTasks.ConcatVcfs as ConcatVcfsIL{ - input: - vcfs=Bcf2VcfIL.vcf, - outfile_prefix="~{prefix}.IL", - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_attr_ConcatVcfs - } - - call MiniTasks.ConcatVcfs as ConcatVcfsIL_le_flank{ - input: - vcfs=Bcf2VcfIL_le_flank.vcf, - outfile_prefix="~{prefix}.IL_le_flank", - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_attr_ConcatVcfs - } - - call MiniTasks.ConcatVcfs as ConcatVcfsIL_ri_flank{ - input: - vcfs=Bcf2VcfIL_ri_flank.vcf, - outfile_prefix="~{prefix}.IL_ri_flank", - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_attr_ConcatVcfs - } - - output{ - File duphold_vcf_il = ConcatVcfsIL.concat_vcf - File duphold_vcf_il_le = ConcatVcfsIL_le_flank.concat_vcf - File duphold_vcf_il_ri = ConcatVcfsIL_ri_flank.concat_vcf - - File PesrAnno = RunRdPeSrAnnotation.pesr_anno - File RdAnno = RunRdPeSrAnnotation.cov - File RdAnno_le = RunRdPeSrAnnotation.cov_le_flank - File RdAnno_ri = RunRdPeSrAnnotation.cov_ri_flank - - File GCAnno = RunGenomicContextAnnotation.anno_bed - File GTGQ = ExtracGTGQ.GQ_GT - File vcf_info = ExtracAlgorithmEvidenceFilter.vcf_info - Array[File] vs_raw = BedComparison_vs_raw.comparison - } - } - -task RunDupholdPerContig{ - input{ - String prefix - String contig - File bam_or_cram_file - File bam_or_cram_index - File vcf_file - File vcf_index - File vcf_le_file - File vcf_le_index - File vcf_ri_file - File vcf_ri_index - File ref_fasta - File ref_fai - File ref_dict - String rdpesr_benchmark_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 1, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - - output { - File bcf = "~{prefix}.~{contig}.bcf" - File bcf_le = "~{prefix}.~{contig}.le_flank.bcf" - File bcf_ri = "~{prefix}.~{contig}.ri_flank.bcf" - } - command <<< - - set -Eeuo pipefail - - duphold -t 4 \ - -v ~{vcf_file} \ - -b ~{bam_or_cram_file} \ - -f ~{ref_fasta} \ - -o ~{prefix}.~{contig}.bcf - - duphold -t 4 \ - -v ~{vcf_le_file} \ - -b ~{bam_or_cram_file} \ - -f ~{ref_fasta} \ - -o ~{prefix}.~{contig}.le_flank.bcf - - duphold -t 4 \ - -v ~{vcf_ri_file} \ - -b ~{bam_or_cram_file} \ - -f ~{ref_fasta} \ - -o ~{prefix}.~{contig}.ri_flank.bcf - - >>> - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: rdpesr_benchmark_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task vcf2bed{ - input{ - String prefix - File vcf - File? vcf_index - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 2, - preemptible_tries: 1, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - String filename = basename(vcf, ".vcf.gz") - - output { - File bed = "~{prefix}.bed" - } - - command <<< - - set -Eeuo pipefail - - gsutil cp ~{vcf} ./tmp.vcf.gz - tabix -p vcf ./tmp.vcf.gz - svtk vcf2bed -i SVTYPE -i SVLEN tmp.vcf.gz ~{prefix}.bed - - >>> - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task Bed2QueryAndRef{ - input{ - File bed - String sv_base_mini_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 7.5, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 1, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - output{ - File query = "${filebase}.query.gz" - File ref = "${filebase}.ref.gz" - } - - String filebase=basename(bed,".bed") - command <<< - echo "#chroms tart end name SVTYPE SVLEN" | sed -e 's/ /\t/g' > ~{filebase}.query - echo "#chrom start end VID svtype length AF samples" | sed -e 's/ /\t/g' > ~{filebase}.ref - - cut -f1-4,7,8 ~{bed} | grep -v "#" >> ~{filebase}.query - cut -f1-4,7,8 ~{bed} | sed -e "s/$/\t0\t~{filebase}/" | grep -v "#" >> ~{filebase}.ref - - bgzip ~{filebase}.query - bgzip ~{filebase}.ref - - >>> - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_base_mini_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task BedComparison{ - input{ - File query - File ref - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 1, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - output{ - File comparison = "~{prefix}.bed" - } - - command <<< - bash /opt/sv-pipeline/scripts/vcf_qc/compare_callsets_V2.sh \ - -O ~{prefix}.bed -p ~{prefix} ~{query} ~{ref} - >>> - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task Bcf2Vcf{ - input{ - String prefix - String contig - File bcf - String sv_base_mini_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 1, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - output{ - File vcf = "~{prefix}.~{contig}.duphold.vcf.gz" - } - - command <<< - set -Eeuo pipefail - bcftools view ~{bcf} | bgzip > ~{prefix}.~{contig}.duphold.vcf.gz - >>> - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_base_mini_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task ShiftVcfForDuphold{ - input{ - String prefix - File vcf_file - File vcf_index - File ref_fai - String rdpesr_benchmark_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 7.5, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 1, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - output{ - File le_flank = "~{prefix}.le_flank.vcf.gz" - File ri_flank = "~{prefix}.ri_flank.vcf.gz" - File le_flank_index = "~{prefix}.le_flank.vcf.gz.tbi" - File ri_flank_index = "~{prefix}.ri_flank.vcf.gz.tbi" - } - - command <<< - python3 /src/Modify_vcf_by_steps.py ~{vcf_file} ~{prefix}.ri_flank.vcf -s 1000 -c ~{ref_fai} - python3 /src/Modify_vcf_by_steps.py ~{vcf_file} ~{prefix}.le_flank.vcf -s -1000 -c ~{ref_fai} - - bgzip ~{prefix}.ri_flank.vcf - bgzip ~{prefix}.le_flank.vcf - - tabix -p vcf ~{prefix}.ri_flank.vcf.gz - tabix -p vcf ~{prefix}.le_flank.vcf.gz - >>> - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: rdpesr_benchmark_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task RunDuphold{ - input{ - String prefix - File bam_or_cram_file - File bam_or_cram_index - File vcf_file - File ref_fasta - File ref_fai - File ref_dict - String rdpesr_benchmark_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 2, - preemptible_tries: 1, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - output{ - File bcf = "~{prefix}.bcf" - } - - command <<< - - set -Eeuo pipefail - - duphold -t 4 \ - -v ~{vcf_file} \ - -b ~{bam_or_cram_file} \ - -f ~{ref_fasta} \ - -o ~{prefix}.bcf - >>> - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: rdpesr_benchmark_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task RunVaPoR{ - input{ - String prefix - String contig - File bam_or_cram_file - File bam_or_cram_index - File bed - File ref_fasta - File ref_fai - File ref_dict - String rdpesr_benchmark_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 1, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - output { - File vapor = "~{bed}.vapor" - File vapor_plot = "~{prefix}.~{contig}.tar.gz" - } - - command <<< - - set -Eeuo pipefail - - mkdir ~{prefix}.~{contig} - - vapor bed \ - --sv-input ~{bed} \ - --output-path ~{prefix}.~{contig} \ - --reference ~{ref_fasta} \ - --pacbio-input ~{bam_or_cram_index} \ - - tar -czf ~{prefix}.~{contig}.tar.gz ~{prefix}.~{contig} - >>> - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: rdpesr_benchmark_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task RunRdPeSrAnnotation{ - input{ - String prefix - File bed - File pe_matrix - File pe_index - File sr_matrix - File sr_index - File rd_matrix - File rd_index - File ref_fasta - File ref_fai - File ref_dict - String rdpesr_benchmark_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 15, - disk_gb: 20, - boot_disk_gb: 10, - preemptible_tries: 1, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - output { - File cov = "~{filebase}.bed.Rd.gz" - File cov_ri_flank = "~{filebase}.ri_flank.Rd.gz" - File cov_le_flank = "~{filebase}.le_flank.Rd.gz" - File pesr_anno = "~{filebase}.bed.PeSr.gz" - } - - String filebase = basename(bed,".bed") - - command <<< - - set -Eeuo pipefail - - Rscript /src/modify_bed_for_PE_SR_RD_labeling.R \ - -i ~{bed} \ - --le_bp ~{bed}.le_bp \ - --ri_bp ~{bed}.ri_bp \ - --le_flank ~{bed}.le_flank \ - --ri_flank ~{bed}.ri_flank - - zcat ~{rd_matrix} | grep -v '@' | grep -v CONTIG |bgzip > bincov.tsv.gz - Rscript /src/bincov_to_normCov.R -i bincov.tsv.gz - bgzip normCov.tsv - tabix -b 2 -e 2 normCov.tsv.gz - - python3 /src/add_RD_to_SVs.py ~{bed} normCov.tsv.gz ~{filebase}.bed.Rd - python3 /src/add_RD_to_SVs.py ~{bed}.ri_flank normCov.tsv.gz ~{filebase}.ri_flank.Rd - python3 /src/add_RD_to_SVs.py ~{bed}.le_flank normCov.tsv.gz ~{filebase}.le_flank.Rd - python3 /src/add_SR_PE_to_PB_INS.V2.py ~{bed} ~{pe_matrix} ~{sr_matrix} ~{filebase}.bed.PeSr - - bgzip ~{filebase}.bed.Rd - bgzip ~{filebase}.ri_flank.Rd - bgzip ~{filebase}.le_flank.Rd - bgzip ~{filebase}.bed.PeSr - - >>> - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: rdpesr_benchmark_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task RunGenomicContextAnnotation{ - input{ - File bed - File ref_SegDup - File ref_SimpRep - File ref_RepMask - String prefix - String rdpesr_benchmark_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 10, - disk_gb: 20, - boot_disk_gb: 10, - preemptible_tries: 1, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - command <<< - - awk '{print $1,$2,$2,$4,$5}' ~{bed} | sed -e 's/ /\t/g' > ~{prefix}.le_bp - awk '{print $1,$3,$3,$4,$5}' ~{bed} | sed -e 's/ /\t/g' > ~{prefix}.ri_bp - bedtools coverage -a ~{prefix}.le_bp -b ~{ref_RepMask} | awk '{if ($9>0) print}'> ~{prefix}.le_bp.vs.RM - bedtools coverage -a ~{prefix}.le_bp -b ~{ref_SegDup} | awk '{if ($9>0) print}'> ~{prefix}.le_bp.vs.SD - bedtools coverage -a ~{prefix}.le_bp -b ~{ref_SimpRep} | awk '{if ($9>0) print}'> ~{prefix}.le_bp.vs.SR - bedtools coverage -a ~{prefix}.ri_bp -b ~{ref_RepMask} | awk '{if ($9>0) print}'> ~{prefix}.ri_bp.vs.RM - bedtools coverage -a ~{prefix}.ri_bp -b ~{ref_SegDup} | awk '{if ($9>0) print}'> ~{prefix}.ri_bp.vs.SD - bedtools coverage -a ~{prefix}.ri_bp -b ~{ref_SimpRep} | awk '{if ($9>0) print}'> ~{prefix}.ri_bp.vs.SR - - - Rscript /src/add_GC_anno_to_bed.R \ - -b ~{bed} \ - -o ~{prefix}.GC_anno.bed \ - --left_vs_SR ~{prefix}.le_bp.vs.SR \ - --left_vs_SD ~{prefix}.le_bp.vs.SD \ - --left_vs_RM ~{prefix}.le_bp.vs.RM \ - --right_vs_SR ~{prefix}.ri_bp.vs.SR \ - --right_vs_SD ~{prefix}.ri_bp.vs.SD \ - --right_vs_RM ~{prefix}.ri_bp.vs.RM - >>> - - output{ - File anno_bed = "~{prefix}.GC_anno.bed" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: rdpesr_benchmark_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task ExtracGTGQ{ - input{ - String prefix - File vcf_file - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 1, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - output{ - File GQ_GT = "~{prefix}.SVID_gt.tsv" - } - - command <<< - zcat ~{vcf_file} | grep -v '#' > ~{prefix}.SVID_gt - - python <>> - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task ExtracAlgorithmEvidenceFilter{ - input{ - String prefix - File vcf_file - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 1, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - output{ - File vcf_info = "~{prefix}.info" - } - - command <<< - zcat ~{vcf_file} | grep -v '##' | cut -f3,7 > ~{prefix}.SVID_filter - svtk vcf2bed -i SVTYPE -i SVLEN -i ALGORITHMS -i EVIDENCE ~{vcf_file} ~{prefix}.bed - paste <(cut -f4,7-10 ~{prefix}.bed) \ - <(cut -f2 ~{prefix}.SVID_filter) \ - > ~{prefix}.info - >>> - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} diff --git a/wdl/ApplyManualVariantFilter.wdl b/wdl/ApplyManualVariantFilter.wdl deleted file mode 100644 index 469a22b7c..000000000 --- a/wdl/ApplyManualVariantFilter.wdl +++ /dev/null @@ -1,85 +0,0 @@ -version 1.0 - -import "Structs.wdl" - -workflow ApplyManualVariantFilter { - input { - String prefix - File vcf - File? vcf_index - String filter_name - String bcftools_filter # supplied to bcftools view -e "" - - String sv_base_mini_docker - RuntimeAttr? runtime_attr_hard_filter_vcf - } - - call HardFilterVcf { - input: - prefix = prefix, - vcf = vcf, - vcf_index = vcf_index, - filter_name = filter_name, - bcftools_filter = bcftools_filter, - sv_base_mini_docker = sv_base_mini_docker, - runtime_attr_override = runtime_attr_hard_filter_vcf - } - - output { - File manual_filtered_vcf = HardFilterVcf.hard_filtered_vcf - File manual_filtered_vcf_index = HardFilterVcf.hard_filtered_vcf_index - } -} - - -task HardFilterVcf { - input { - String prefix - File vcf - File? vcf_index - String filter_name - String bcftools_filter - - String sv_base_mini_docker - RuntimeAttr? runtime_attr_override - } - - String hard_filtered_vcf_name = "~{prefix}.~{filter_name}.vcf.gz" - - # Disk must be scaled proportionally to the size of the VCF - Float input_size = size(vcf, "GiB") - RuntimeAttr default_attr = object { - mem_gb: 3.75, - disk_gb: ceil(10.0 + (input_size * 2)), - cpu_cores: 1, - preemptible_tries: 3, - max_retries: 1, - boot_disk_gb: 10 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - command <<< - - set -euo pipefail - - bcftools view -e '~{bcftools_filter}' ~{vcf} -Oz -o "~{hard_filtered_vcf_name}" - - tabix "~{hard_filtered_vcf_name}" - - >>> - - output { - File hard_filtered_vcf = "~{hard_filtered_vcf_name}" - File hard_filtered_vcf_index = "~{hard_filtered_vcf_name}.tbi" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_base_mini_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} diff --git a/wdl/CalcAF.wdl b/wdl/CalcAF.wdl deleted file mode 100644 index cbc124e2a..000000000 --- a/wdl/CalcAF.wdl +++ /dev/null @@ -1,180 +0,0 @@ -version 1.0 - -import "Structs.wdl" -import "CleanVcf5.wdl" as cleanvcf5 -import "TasksMakeCohortVcf.wdl" as tmc - -workflow CalcAF { - input { - File vcf - File vcf_idx - Int sv_per_shard - String prefix - String sv_pipeline_docker - File? sample_pop_assignments #Two-column file with sample ID & pop assignment. "." for pop will ignore sample - File? famfile #Used for M/F AF calculations - File? par_bed #Used for marking hemizygous males on X & Y - File? allosomes_list #allosomes .fai used to override default sex chromosome assignments - String? drop_empty_records - - RuntimeAttr? runtime_attr_compute_shard_af - RuntimeAttr? runtime_attr_scatter_vcf - RuntimeAttr? runtime_attr_combine_sharded_vcfs - } - - - # Tabix to chromosome of interest, and shard input VCF for stats collection - call tmc.ScatterVcf { - input: - vcf=vcf, - prefix=prefix, - sv_pipeline_docker=sv_pipeline_docker, - records_per_shard=sv_per_shard, - runtime_attr_override = runtime_attr_scatter_vcf - } - - # Scatter over VCF shards - scatter ( shard in ScatterVcf.shards ) { - # Collect AF summary stats - call ComputeShardAFs { - input: - vcf=shard, - sv_pipeline_docker=sv_pipeline_docker, - prefix=prefix, - sample_pop_assignments=sample_pop_assignments, - famfile=famfile, - par_bed=par_bed, - allosomes_list=allosomes_list, - runtime_attr_override = runtime_attr_compute_shard_af - } - } - - # Merge shards into single VCF - call CombineShardedVcfs { - input: - vcfs=ComputeShardAFs.shard_wAFs, - sv_pipeline_docker=sv_pipeline_docker, - prefix=prefix, - drop_empty_records=drop_empty_records, - runtime_attr_override = runtime_attr_combine_sharded_vcfs - } - - # Final output - output { - File vcf_wAFs = CombineShardedVcfs.vcf_out - File vcf_wAFs_idx = CombineShardedVcfs.vcf_out_idx - } -} - -# Subset a vcf to a single chromosome, and add global AF information (no subpop) -task ComputeShardAFs { - input { - File vcf - String prefix - String sv_pipeline_docker - File? sample_pop_assignments - File? famfile - File? par_bed - File? allosomes_list - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 1.5, - disk_gb: ceil(20 + size(vcf, "GB") * 2), - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - command <<< - set -euo pipefail - optionals=" " - if [ ~{default="SKIP" sample_pop_assignments} != "SKIP" ]; then - optionals="$( echo "$optionals" ) -p ~{sample_pop_assignments}" - fi - if [ ~{default="SKIP" famfile} != "SKIP" ]; then - optionals="$( echo "$optionals" ) -f ~{famfile}" - fi - if [ ~{default="SKIP" par_bed} != "SKIP" ]; then - optionals="$( echo "$optionals" ) --par ~{par_bed}" - fi - if [ ~{default="SKIP" allosomes_list} != "SKIP" ]; then - optionals="$( echo "$optionals" ) --allosomes-list ~{allosomes_list}" - fi - echo -e "OPTIONALS INTERPRETED AS: $optionals" - echo -e "NOW RUNNING: /opt/sv-pipeline/05_annotation/scripts/compute_AFs.py $( echo "$optionals" ) ~{vcf} stdout" - #Tabix chromosome of interest & compute AN, AC, and AF - /opt/sv-pipeline/05_annotation/scripts/compute_AFs.py $optionals "~{vcf}" stdout \ - | bgzip -c \ - > "~{prefix}.wAFs.vcf.gz" - >>> - - output { - File shard_wAFs = "~{prefix}.wAFs.vcf.gz" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Merge VCF shards & drop records with zero remaining non-ref alleles -task CombineShardedVcfs { - input { - Array[File] vcfs - String prefix - String sv_pipeline_docker - String? drop_empty_records - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 4, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - command { - set -euo pipefail - vcf-concat -f ~{write_lines(vcfs)} \ - | vcf-sort \ - > merged.vcf - if [ ~{default="TRUE" drop_empty_records} == "TRUE" ]; then - /opt/sv-pipeline/05_annotation/scripts/prune_allref_records.py \ - merged.vcf stdout \ - | bgzip -c \ - > "~{prefix}.wAFs.vcf.gz" - else - cat merged.vcf | bgzip -c > "~{prefix}.wAFs.vcf.gz" - fi - tabix -p vcf "~{prefix}.wAFs.vcf.gz" - } - - - output { - File vcf_out = "~{prefix}.wAFs.vcf.gz" - File vcf_out_idx = "~{prefix}.wAFs.vcf.gz.tbi" - } - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - diff --git a/wdl/CombineRegeno.wdl b/wdl/CombineRegeno.wdl deleted file mode 100644 index 72bf8f3bb..000000000 --- a/wdl/CombineRegeno.wdl +++ /dev/null @@ -1,58 +0,0 @@ -version 1.0 - -import "Structs.wdl" - -workflow MergeCohortVcfs { - input { - Array[File] beds # Filtered depth VCFs across batches - String sv_base_mini_docker - RuntimeAttr? runtime_attr_merge_list - } - - call MergeList { - input: - regeno_beds = beds, - prefix = "master_regeno", - sv_base_mini_docker = sv_base_mini_docker, - runtime_attr_override = runtime_attr_merge_list - } - - output { - File regeno = MergeList.master_regeno - } -} - -task MergeList{ - input{ - String prefix - Array[File] regeno_beds - String sv_base_mini_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - command{ - cat ~{sep=' ' regeno_beds} |sort -k1,1V -k2,2n -k3,3n > ~{prefix}.bed - } - output{ - File master_regeno="master_regeno.bed" - } - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_base_mini_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} diff --git a/wdl/ConcatTextFiles.wdl b/wdl/ConcatTextFiles.wdl deleted file mode 100644 index 1a8755e4c..000000000 --- a/wdl/ConcatTextFiles.wdl +++ /dev/null @@ -1,43 +0,0 @@ -version 1.0 - -import "TasksMakeCohortVcf.wdl" as tasks - -workflow ConcatTextFiles { - - input { - Array[File] text_files - String output_prefix - String output_suffix = "concat.txt" - - Boolean gzipped = false - Boolean headered = false - - String linux_docker - String sv_base_mini_docker - } - - if (!headered) { - # Disable filter command since input might be compressed - call tasks.CatUncompressedFiles { - input: - shards=text_files, - outfile_name="~{output_prefix}.~{output_suffix}", - filter_command="", - sv_base_mini_docker=sv_base_mini_docker - } - } - - if (headered) { - call tasks.ConcatHeaderedTextFiles { - input: - text_files=text_files, - gzipped=gzipped, - output_filename="~{output_prefix}.~{output_suffix}", - linux_docker=linux_docker - } - } - - output { - File concatenated_files = select_first([ConcatHeaderedTextFiles.out, CatUncompressedFiles.outfile]) - } -} diff --git a/wdl/CramToBam.ReviseBase.wdl b/wdl/CramToBam.ReviseBase.wdl deleted file mode 100644 index e8a137f58..000000000 --- a/wdl/CramToBam.ReviseBase.wdl +++ /dev/null @@ -1,247 +0,0 @@ -version 1.0 - -import "Structs.wdl" - -workflow CramToBamReviseBase { - input { - File cram_file - File? cram_index # required if cram is requester pays - File reference_fasta - File? reference_index - File contiglist - String samtools_cloud_docker - RuntimeAttr? runtime_attr_split_cram - RuntimeAttr? runtime_attr_revise_base - RuntimeAttr? runtime_attr_concat_bam - } - - Array[String] contigs = transpose(read_tsv(contiglist))[0] - String base_name = basename(cram_file, ".cram") - - scatter (contig in contigs) { - call SplitCramPerContig { - input: - cram_file = cram_file, - contig = contig, - reference_fasta = reference_fasta, - reference_index = reference_index, - samtools_cloud_docker = samtools_cloud_docker, - runtime_attr_override = runtime_attr_split_cram - } - call ReviseBaseInBam { - input: - bam_file = SplitCramPerContig.bam_file, - bam_index = SplitCramPerContig.bam_index, - reference_fasta = reference_fasta, - reference_index = reference_index, - samtools_cloud_docker = samtools_cloud_docker, - runtime_attr_override = runtime_attr_revise_base - } - } - - call ConcatBam { - input: - prefix = base_name, - bam_files = ReviseBaseInBam.revised_bam_file, - bam_indexes = ReviseBaseInBam.revised_bam_index, - samtools_cloud_docker = samtools_cloud_docker, - runtime_attr_override = runtime_attr_concat_bam - } - - output { - File bam_file = ConcatBam.bam_file - File bam_index = ConcatBam.bam_index - } -} - -task SplitCramPerContig { - input { - File cram_file - File reference_fasta - File? reference_index - String contig - String samtools_cloud_docker - RuntimeAttr? runtime_attr_override - } - - parameter_meta { - cram_file: { - localization_optional: true - } - } - - String bam_file_name = basename(cram_file, ".cram") - - File reference_index_file = select_first([reference_index, reference_fasta + ".fai"]) - - Int num_cpu = if defined(runtime_attr_override) then select_first([select_first([runtime_attr_override]).cpu_cores, 4]) else 4 - - Float cram_inflate_ratio = 3.5 - Float disk_overhead = 10.0 - Float cram_size = size(cram_file, "GiB") - Float bam_size = (cram_inflate_ratio * cram_size) / 10 - Float ref_size = size(reference_fasta, "GiB") - Float ref_index_size = size(reference_index_file, "GiB") - Int vm_disk_size = ceil(bam_size + ref_size + ref_index_size + disk_overhead) - - RuntimeAttr default_attr = object { - cpu_cores: num_cpu, - mem_gb: 1.5, - disk_gb: vm_disk_size, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - output { - File bam_file = "~{bam_file_name}.~{contig}.bam" - File bam_index = "~{bam_file_name}.~{contig}.bam.bai" - } - command <<< - - set -Eeuo pipefail - - # necessary for getting permission to read from google bucket directly - export GCS_OAUTH_TOKEN=`gcloud auth application-default print-access-token` - - # covert cram to bam - samtools view \ - -b \ - -h \ - -@ ~{num_cpu} \ - -T "~{reference_fasta}" \ - -o "~{bam_file_name}.~{contig}.bam" \ - "~{cram_file}" \ - "~{contig}" - - # index bam file - samtools index -@ ~{num_cpu} "~{bam_file_name}.~{contig}.bam" - >>> - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: samtools_cloud_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task ReviseBaseInBam { - input { - File bam_file - File bam_index - File reference_fasta - File? reference_index - String samtools_cloud_docker - RuntimeAttr? runtime_attr_override - } - - String base_name = basename(bam_file, ".bam") - - Int num_cpu = if defined(runtime_attr_override) then select_first([select_first([runtime_attr_override]).cpu_cores, 4]) else 4 - - File reference_index_file = select_first([reference_index, reference_fasta + ".fai"]) - Float cram_inflate_ratio = 3.5 - Float disk_overhead = 30.0 - Float cram_size = size(bam_file, "GiB") - Float bam_size = cram_inflate_ratio * cram_size - Float ref_size = size(reference_fasta, "GiB") - Float ref_index_size = size(reference_index_file, "GiB") - Int vm_disk_size = ceil(cram_size + bam_size + ref_size + ref_index_size + disk_overhead) - - RuntimeAttr default_attr = object { - cpu_cores: num_cpu, - mem_gb: 1.5, - disk_gb: vm_disk_size, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - command <<< - samtools view -H ~{bam_file} > ~{base_name}.revised.sam - - paste \ - <(samtools view ~{bam_file} | cut -f1-9) \ - <(samtools view ~{bam_file} | cut -f10 | sed -e "s/Y/N/g" | sed -e "s/R/N/g" | sed -e "s/W/N/g" | sed -e "s/S/N/g" | sed -e "s/K/N/g" | sed -e "s/M/N/g" | sed -e "s/D/N/g" | sed -e "s/H/N/g" | sed -e "s/V/N/g" | sed -e "s/B/N/g" | sed -e "s/X/N/g" ) \ - <(samtools view ~{bam_file} | cut -f11-) \ - >> ~{base_name}.revised.sam - - samtools view -Sb ~{base_name}.revised.sam -o ~{base_name}.revised.bam - - samtools index ~{base_name}.revised.bam - - >>> - - output{ - File revised_bam_file = "~{base_name}.revised.bam" - File revised_bam_index = "~{base_name}.revised.bam.bai" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: samtools_cloud_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task ConcatBam { - input { - String prefix - Array[File] bam_files - Array[File] bam_indexes - String samtools_cloud_docker - RuntimeAttr? runtime_attr_override - } - - Int num_cpu = if defined(runtime_attr_override) then select_first([select_first([runtime_attr_override]).cpu_cores, 2]) else 2 - - Float disk_overhead = 10.0 - Float bam_size = size(bam_files, "GiB") - Float index_size = size(bam_indexes, "GiB") - Int vm_disk_size = 2 * ceil(bam_size + index_size + disk_overhead) - - RuntimeAttr default_attr = object { - cpu_cores: num_cpu, - mem_gb: 1.5, - disk_gb: vm_disk_size, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - command <<< - samtools merge ~{prefix}.bam ~{sep=" " bam_files} - samtools index ~{prefix}.bam - >>> - - output{ - File bam_file = "~{prefix}.bam" - File bam_index = "~{prefix}.bam.bai" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: samtools_cloud_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - - - diff --git a/wdl/Duphold.wdl b/wdl/Duphold.wdl deleted file mode 100644 index 5bc34b78f..000000000 --- a/wdl/Duphold.wdl +++ /dev/null @@ -1,232 +0,0 @@ -version 1.0 - -import "Structs.wdl" -import "TasksMakeCohortVcf.wdl" as MiniTasks -import "TasksBenchmark.wdl" as tasks10 - -workflow Duphold { - input { - String prefix - String bam_or_cram_file - String bam_or_cram_index - File vcf_file - File ref_fasta - File ref_fai - File ref_dict - File contig_list - String duphold_docker - String sv_base_mini_docker - String sv_pipeline_docker - RuntimeAttr? runtime_attr_duphold - RuntimeAttr? runtime_attr_bcf2vcf - RuntimeAttr? runtime_attr_LocalizeCram - RuntimeAttr? runtime_attr_SplitVcf - RuntimeAttr? runtime_attr_ConcatVcfs - } - - Array[String] contigs = transpose(read_tsv(contig_list))[0] - scatter ( contig in contigs ) { - - call tasks10.LocalizeCram as LocalizeCram{ - input: - contig = contig, - ref_fasta=ref_fasta, - ref_fai=ref_fai, - ref_dict=ref_dict, - bam_or_cram_file=bam_or_cram_file, - bam_or_cram_index=bam_or_cram_index, - sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_override=runtime_attr_LocalizeCram - } - - call tasks10.SplitVcf as SplitVcf{ - input: - contig = contig, - vcf_file = vcf_file, - sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_override=runtime_attr_SplitVcf - } - - call RunDupholdPerContig{ - input: - prefix = prefix, - contig = contig, - bam_or_cram_file=LocalizeCram.local_bam, - bam_or_cram_index=LocalizeCram.local_bai, - vcf_file = SplitVcf.contig_vcf, - vcf_index = SplitVcf.contig_vcf_index, - ref_fasta = ref_fasta, - ref_fai = ref_fai, - ref_dict = ref_dict, - duphold_docker = duphold_docker, - runtime_attr_override = runtime_attr_duphold - } - - call Bcf2Vcf{ - input: - prefix = prefix, - contig = contig, - bcf = RunDupholdPerContig.bcf, - sv_base_mini_docker = sv_base_mini_docker, - runtime_attr_override = runtime_attr_bcf2vcf - } - } - - call MiniTasks.ConcatVcfs as ConcatVcfs{ - input: - vcfs=Bcf2Vcf.vcf, - outfile_prefix=prefix, - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_attr_ConcatVcfs - } - - output{ - File vcf = ConcatVcfs.concat_vcf - File vcf_idx = ConcatVcfs.concat_vcf_idx - } - } - -task RunDupholdPerContig{ - input{ - String prefix - String contig - File bam_or_cram_file - File bam_or_cram_index - File vcf_file - File vcf_index - File ref_fasta - File ref_fai - File ref_dict - String duphold_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 10, - disk_gb: 100, - boot_disk_gb: 10, - preemptible_tries: 0, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - - output { - File bcf = "~{prefix}.~{contig}.bcf" - } - command <<< - - set -Eeuo pipefail - - duphold -t 4 \ - -v ~{vcf_file} \ - -b ~{bam_or_cram_file} \ - -f ~{ref_fasta} \ - -o ~{prefix}.~{contig}.bcf - - >>> - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: duphold_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task Bcf2Vcf{ - input{ - String prefix - String contig - File bcf - String sv_base_mini_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 5, - boot_disk_gb: 5, - preemptible_tries: 0, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - output{ - File vcf = "~{prefix}.~{contig}.duphold.vcf.gz" - } - command <<< - set -Eeuo pipefail - bcftools view ~{bcf} | bgzip > ~{prefix}.~{contig}.duphold.vcf.gz - >>> - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_base_mini_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task RunDuphold{ - input{ - String prefix - File bam_or_cram_file - File bam_or_cram_index - File vcf_file - File ref_fasta - File ref_fai - File ref_dict - String duphold_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 10, - disk_gb: 100, - boot_disk_gb: 10, - preemptible_tries: 0, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - - output { - File bcf = "~{prefix}.bcf" - } - command <<< - - set -Eeuo pipefail - - duphold -t 4 \ - -v ~{vcf_file} \ - -b ~{bam_or_cram_file} \ - -f ~{ref_fasta} \ - -o ~{prefix}.bcf - - >>> - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: duphold_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - - - - diff --git a/wdl/FilterBatchQc.wdl b/wdl/FilterBatchQc.wdl deleted file mode 100644 index c056f2629..000000000 --- a/wdl/FilterBatchQc.wdl +++ /dev/null @@ -1,148 +0,0 @@ -version 1.0 - -import "MainVcfQc.wdl" as vcf_qc -import "Utils.wdl" as util - -workflow FilterBatchQc { - input { - File? manta_vcf_noOutliers - File? melt_vcf_noOutliers - File? wham_vcf_noOutliers - File? depth_vcf_noOutliers - File? merged_pesr_vcf - - File? manta_vcf_noOutliers_index - File? melt_vcf_noOutliers_index - File? wham_vcf_noOutliers_index - File? depth_vcf_noOutliers_index - File? merged_pesr_vcf_index - - String batch - File ped_file - Array[Array[String]]? site_level_comparison_datasets # Array of two-element arrays, one per dataset, each of format [prefix, gs:// path to directory with one BED per population] - Array[Array[String]]? sample_level_comparison_datasets # Array of two-element arrays, one per dataset, each of format [prefix, gs:// path to per-sample tarballs] - File? sample_renaming_tsv # File with mapping to rename sample IDs for compatibility with sample_level_comparison_datasets - - File contig_list - Int? random_seed - Int? max_gq - - String sv_base_mini_docker - String sv_pipeline_docker - String sv_pipeline_qc_docker - - # overrides for local tasks - RuntimeAttr? runtime_override_plot_qc_vcf_wide - RuntimeAttr? runtime_override_site_level_benchmark_plot - RuntimeAttr? runtime_override_plot_qc_per_sample - RuntimeAttr? runtime_override_plot_qc_per_family - RuntimeAttr? runtime_override_per_sample_benchmark_plot - RuntimeAttr? runtime_override_sanitize_outputs - RuntimeAttr? runtime_attr_ids_from_vcf - RuntimeAttr? runtime_attr_subset_ped - - # overrides for MiniTasks - RuntimeAttr? runtime_override_subset_vcf - RuntimeAttr? runtime_override_merge_vcfwide_stat_shards - RuntimeAttr? runtime_override_merge_vcf_2_bed - - # overrides for CollectQcVcfWide - RuntimeAttr? runtime_override_preprocess_vcf - RuntimeAttr? runtime_override_collect_sharded_vcf_stats - RuntimeAttr? runtime_override_svtk_vcf_2_bed - RuntimeAttr? runtime_override_scatter_vcf - RuntimeAttr? runtime_override_merge_subvcf_stat_shards - - # overrides for CollectSiteLevelBenchmarking - RuntimeAttr? runtime_override_site_level_benchmark - RuntimeAttr? runtime_override_merge_site_level_benchmark - - # overrides for CollectQcPerSample - RuntimeAttr? runtime_override_collect_vids_per_sample - RuntimeAttr? runtime_override_split_samples_list - RuntimeAttr? runtime_override_tar_shard_vid_lists - RuntimeAttr? runtime_override_merge_sharded_per_sample_vid_lists - - # overrides for CollectPerSampleBenchmarking - RuntimeAttr? runtime_override_benchmark_samples - RuntimeAttr? runtime_override_split_shuffled_list - RuntimeAttr? runtime_override_merge_and_tar_shard_benchmarks - } - - Array[String] algorithms = ["manta", "melt", "wham", "depth", "pesr"] - Array[File?] vcfs_array = [manta_vcf_noOutliers, melt_vcf_noOutliers, wham_vcf_noOutliers, depth_vcf_noOutliers, merged_pesr_vcf] - Array[File?] vcf_indexes_array = [manta_vcf_noOutliers_index, melt_vcf_noOutliers_index, wham_vcf_noOutliers_index, depth_vcf_noOutliers_index, merged_pesr_vcf_index] - Int num_algorithms = length(algorithms) - - call util.GetSampleIdsFromVcf { - input: - vcf = select_first(vcfs_array), - sv_base_mini_docker = sv_base_mini_docker, - runtime_attr_override = runtime_attr_ids_from_vcf - } - - call util.SubsetPedFile { - input: - ped_file = ped_file, - sample_list = GetSampleIdsFromVcf.out_file, - subset_name = batch, - sv_base_mini_docker = sv_base_mini_docker, - runtime_attr_override = runtime_attr_subset_ped - } - - Int max_gq_ = select_first([max_gq, 999]) - - scatter (i in range(num_algorithms)) { - if (defined(vcfs_array[i]) && defined(vcf_indexes_array[i])) { - call vcf_qc.MainVcfQc as VcfQc { - input: - vcfs = [select_first([vcfs_array[i]])], - ped_file=SubsetPedFile.ped_subset_file, - prefix="${batch}.${algorithms[i]}_FilterBatch_filtered_vcf", - sv_per_shard=2500, - samples_per_shard=600, - site_level_comparison_datasets=site_level_comparison_datasets, - sample_level_comparison_datasets=sample_level_comparison_datasets, - sample_renaming_tsv=sample_renaming_tsv, - primary_contigs_fai=contig_list, - random_seed=random_seed, - max_gq=max_gq_, - sv_base_mini_docker=sv_base_mini_docker, - sv_pipeline_docker=sv_pipeline_docker, - sv_pipeline_qc_docker=sv_pipeline_qc_docker, - runtime_override_subset_vcf=runtime_override_subset_vcf, - runtime_override_preprocess_vcf=runtime_override_preprocess_vcf, - runtime_override_plot_qc_vcf_wide=runtime_override_plot_qc_vcf_wide, - runtime_override_site_level_benchmark_plot=runtime_override_site_level_benchmark_plot, - runtime_override_per_sample_benchmark_plot=runtime_override_per_sample_benchmark_plot, - runtime_override_plot_qc_per_sample=runtime_override_plot_qc_per_sample, - runtime_override_plot_qc_per_family=runtime_override_plot_qc_per_family, - runtime_override_sanitize_outputs=runtime_override_sanitize_outputs, - runtime_override_merge_vcfwide_stat_shards=runtime_override_merge_vcfwide_stat_shards, - runtime_override_merge_vcf_2_bed=runtime_override_merge_vcf_2_bed, - runtime_override_collect_sharded_vcf_stats=runtime_override_collect_sharded_vcf_stats, - runtime_override_svtk_vcf_2_bed=runtime_override_svtk_vcf_2_bed, - runtime_override_scatter_vcf=runtime_override_scatter_vcf, - runtime_override_merge_subvcf_stat_shards=runtime_override_merge_subvcf_stat_shards, - runtime_override_site_level_benchmark=runtime_override_site_level_benchmark, - runtime_override_merge_site_level_benchmark=runtime_override_merge_site_level_benchmark, - runtime_override_collect_vids_per_sample=runtime_override_collect_vids_per_sample, - runtime_override_split_samples_list=runtime_override_split_samples_list, - runtime_override_tar_shard_vid_lists=runtime_override_tar_shard_vid_lists, - runtime_override_benchmark_samples=runtime_override_benchmark_samples, - runtime_override_split_shuffled_list=runtime_override_split_shuffled_list, - runtime_override_merge_sharded_per_sample_vid_lists=runtime_override_merge_sharded_per_sample_vid_lists, - runtime_override_merge_and_tar_shard_benchmarks=runtime_override_merge_and_tar_shard_benchmarks - } - } - } - - output { - File? filtered_manta_vcf_qc = VcfQc.sv_vcf_qc_output[0] - File? filtered_melt_vcf_qc = VcfQc.sv_vcf_qc_output[1] - File? filtered_wham_vcf_qc = VcfQc.sv_vcf_qc_output[2] - File? filtered_depth_vcf_qc = VcfQc.sv_vcf_qc_output[3] - File? filtered_pesr_vcf_qc = VcfQc.sv_vcf_qc_output[4] - } - -} diff --git a/wdl/FilterCleanupQualRecalibration.wdl b/wdl/FilterCleanupQualRecalibration.wdl deleted file mode 100644 index f9bafa74e..000000000 --- a/wdl/FilterCleanupQualRecalibration.wdl +++ /dev/null @@ -1,241 +0,0 @@ -version 1.0 - -import "Structs.wdl" -import "TasksMakeCohortVcf.wdl" as MiniTasks - - -workflow FilterCleanupQualRecalibration { - input{ - File vcf - File vcf_idx - File? pcrplus_samples_list - File famfile - Float min_callrate_global - Float min_callrate_smallDels - File contiglist - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_ConcatVcfs - } - Array[Array[String]] contigs = read_tsv(contiglist) - - call RemoveMCNVs{ - input: - vcf = vcf, - vcf_idx = vcf_idx, - sv_pipeline_docker=sv_pipeline_docker - } - - scatter ( contig in contigs ) { - call Cleanup { - input: - vcf=RemoveMCNVs.no_mcnv_vcf, - vcf_idx=RemoveMCNVs.no_mcnv_idx, - contig=contig[0], - pcrplus_samples_list=pcrplus_samples_list, - famfile=famfile, - min_callrate_global=min_callrate_global, - min_callrate_smallDels=min_callrate_smallDels, - prefix=prefix, - sv_pipeline_docker=sv_pipeline_docker - } - } - - call MiniTasks.ConcatVcfs as ConcatVcfs { - input: - vcfs=Cleanup.out_vcf, - outfile_prefix="~{prefix}.cleaned_filters_qual_recalibrated", - naive=true, - sv_base_mini_docker=sv_pipeline_docker, - runtime_attr_override=runtime_attr_ConcatVcfs - } - - call MiniTasks.ConcatVcfs as MergeMCNV { - input: - vcfs= [ConcatVcfs.concat_vcf, RemoveMCNVs.mcnv_vcf], - vcfs_idx = [ConcatVcfs.concat_vcf_idx,RemoveMCNVs.mcnv_idx], - allow_overlaps = true, - outfile_prefix = "~{prefix}.cleaned_filters_qual_recali", - sv_base_mini_docker=sv_pipeline_docker, - runtime_attr_override=runtime_attr_ConcatVcfs - } - - output { - File cleaned_vcf = MergeMCNV.concat_vcf - File cleaned_vcf_idx = MergeMCNV.concat_vcf_idx - } -} - -#remove mCNV from the vcf, which will be added back to the output: - -task RemoveMCNVs{ - input{ - File vcf - File vcf_idx - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - zcat ~{vcf} | awk '{if ($7!="MULTIALLELIC") print}' | bgzip > no_MCNV.vcf.gz - tabix no_MCNV.vcf.gz - zcat ~{vcf} | grep '#' > MCNV.vcf - zcat ~{vcf} | awk '{if ($7=="MULTIALLELIC") print}' >> MCNV.vcf - bgzip MCNV.vcf - tabix MCNV.vcf.gz - >>> - - output{ - File mcnv_vcf = "MCNV.vcf.gz" - File mcnv_idx = "MCNV.vcf.gz.tbi" - File no_mcnv_vcf = "no_MCNV.vcf.gz" - File no_mcnv_idx = "no_MCNV.vcf.gz.tbi" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task MergeMCNV{ - input{ - File vcf - File vcf_idx - File mcnv - File mcnv_idx - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - #zcat ~{mcnv} |uniq | bgzip > mcnv.vcf.gz - #tabix mcnv.vcf.gz - vcf-concat ~{vcf} ~{mcnv} | vcf-sort | bgzip > ~{prefix}.cleaned_filters_qual_recali.vcf.gz - tabix ~{prefix}.cleaned_filters_qual_recali.vcf.gz - >>> - - output{ - File with_mcnv_vcf = "~{prefix}.cleaned_filters_qual_recali.vcf.gz" - File with_mcnv_idx = "~{prefix}.cleaned_filters_qual_recali.vcf.gz.tbi" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -# Applies filters & cleanup to VCF for a single chromosome -task Cleanup { - input{ - File vcf - File vcf_idx - String contig - File? pcrplus_samples_list - File famfile - Float min_callrate_global - Float min_callrate_smallDels - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 0 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - - set -euo pipefail - #Subset to chromosome of interest - tabix -h ~{vcf} ~{contig} | bgzip -c > input.vcf.gz - #Get list of PCR- samples - tabix -H ~{vcf} | fgrep -v "##" | cut -f10- | sed 's/\t/\n/g' \ - > all.samples.list - if [ ! -z "~{pcrplus_samples_list}" ];then - fgrep -wvf ~{pcrplus_samples_list} all.samples.list \ - > pcrminus.samples.list - else - cp all.samples.list pcrminus.samples.list - fi - #Restrict famfiles - #while read ptn; do fgrep -w $ptn ~{famfile}; done < all.samples.list > revised.fam - awk -F "\t" 'NR==FNR{c[$1]++;next};c[$2] > 0' all.samples.list ~{famfile} > revised.pre - awk 'NR==FNR{o[FNR]=$1; next} {t[$2]=$0} END{for(x=1; x<=FNR; x++){y=o[x]; print t[y]}}' all.samples.list revised.pre > revised.fam - fgrep -wf pcrminus.samples.list revised.fam > revised.pcrminus.fam - #Compute fraction of missing genotypes per variant - zcat input.vcf.gz \ - | awk '{ if ($7 !~ /MULTIALLELIC/) print $0 }' \ - | bgzip -c \ - > input.noMCNV.vcf.gz - plink2 \ - --missing variant-only \ - --max-alleles 2 \ - --keep-fam revised.pcrminus.fam \ - --fam revised.fam \ - --vcf input.noMCNV.vcf.gz - fgrep -v "#" plink2.vmiss \ - | awk -v OFS="\t" '{ print $2, 1-$NF }' \ - > callrates.txt - #Clean up VCF - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/filter_cleanup_and_QUAL_recalibration.PCRMinus_only.py \ - --callrate-table callrates.txt \ - --min-callrate-global ~{min_callrate_global} \ - --min-callrate-smallDels ~{min_callrate_smallDels} \ - input.vcf.gz \ - stdout \ - | bgzip -c \ - > "~{prefix}.~{contig}.cleaned_filters_qual_recalibrated.vcf.gz" - # tabix -p vcf -f "~{prefix}.cleaned_filters_qual_recalibrated.vcf.gz" - >>> - - output { - File out_vcf = "~{prefix}.~{contig}.cleaned_filters_qual_recalibrated.vcf.gz" - # File out_vcf_idx = "~{prefix}.cleaned_filters_qual_recalibrated.vcf.gz.tbi" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - diff --git a/wdl/FilterOutlierSamplesPostMinGQ.wdl b/wdl/FilterOutlierSamplesPostMinGQ.wdl deleted file mode 100644 index b3e6a9eab..000000000 --- a/wdl/FilterOutlierSamplesPostMinGQ.wdl +++ /dev/null @@ -1,404 +0,0 @@ -version 1.0 - -import "Structs.wdl" - -# This is an analysis WDL to identify & filter outliers from VCFs -# after minGQ filtering at the end of the Talkowski SV pipeline - -# Treats PCR+ and PCR- samples separately - -workflow FilterOutlierSamplesPostMinGQ { - input{ - File vcf - File vcf_idx - File? pcrplus_samples_list - Int? n_iqr_cutoff_pcrplus - Int n_iqr_cutoff_pcrminus - String prefix - File autosomes_list - String sv_pipeline_docker - } - Array[Array[String]] contigs=read_tsv(autosomes_list) - Boolean PCRPLUS = defined(pcrplus_samples_list) - - # Write original list of unfiltered samples and split by PCR status - call WriteSamplesList { - input: - vcf=vcf, - vcf_idx=vcf_idx, - pcrplus_samples_list=pcrplus_samples_list, - prefix=prefix, - sv_pipeline_docker=sv_pipeline_docker - } - - # Get count of biallelic autosomal variants per sample - scatter ( contig in contigs ) { - call CountSvtypes { - input: - vcf=vcf, - vcf_idx=vcf_idx, - prefix=prefix, - contig=contig[0], - sv_pipeline_docker=sv_pipeline_docker - } - } - call CombineCounts { - input: - svcounts=CountSvtypes.sv_counts, - prefix=prefix, - sv_pipeline_docker=sv_pipeline_docker - } - - # Get outliers - if (PCRPLUS) { - call IdentifyOutliers as identify_PCRPLUS_outliers { - input: - svcounts=CombineCounts.summed_svcounts, - n_iqr_cutoff=select_first([n_iqr_cutoff_pcrplus]), - samples_list=WriteSamplesList.plus_samples_list, - prefix="~{prefix}.PCRPLUS", - sv_pipeline_docker=sv_pipeline_docker - } - } - call IdentifyOutliers as identify_PCRMINUS_outliers { - input: - svcounts=CombineCounts.summed_svcounts, - n_iqr_cutoff=n_iqr_cutoff_pcrminus, - samples_list=WriteSamplesList.minus_samples_list, - prefix="~{prefix}.PCRMINUS", - sv_pipeline_docker=sv_pipeline_docker - } - - # Exclude outliers from vcf - call ExcludeOutliers { - input: - vcf=vcf, - vcf_idx=vcf_idx, - plus_outliers_list=identify_PCRPLUS_outliers.outliers_list, - minus_outliers_list=identify_PCRMINUS_outliers.outliers_list, - outfile="~{prefix}.outliers_removed.vcf.gz", - prefix=prefix, - sv_pipeline_docker=sv_pipeline_docker - } - - # Write new list of samples without outliers - call FilterSampleList { - input: - original_samples_list=WriteSamplesList.samples_list, - outlier_samples=ExcludeOutliers.merged_outliers_list, - prefix=prefix, - sv_pipeline_docker=sv_pipeline_docker - } - - # Final outputs - output { - File vcf_noOutliers = ExcludeOutliers.vcf_no_outliers - File vcf_noOutliers_idx = ExcludeOutliers.vcf_no_outliers_idx - File nooutliers_samples_list = FilterSampleList.filtered_samples_list - File excluded_samples_list = ExcludeOutliers.merged_outliers_list - File svcounts_per_sample_data = CombineCounts.summed_svcounts - File? svcounts_per_sample_plots_PCRPLUS = identify_PCRPLUS_outliers.svcount_distrib_plots - File svcounts_per_sample_plots_PCRMINUS = identify_PCRMINUS_outliers.svcount_distrib_plots - } -} - - -# Write original list of samples -task WriteSamplesList { - input{ - File vcf - File vcf_idx - File? pcrplus_samples_list - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - tabix -H ~{vcf} | fgrep -v "##" \ - | cut -f10- | sed 's/\t/\n/g' > "~{prefix}.samples.list" - if [ ! -z "~{pcrplus_samples_list}" ];then - fgrep -wf ~{pcrplus_samples_list} "~{prefix}.samples.list" \ - > "~{prefix}.PCRPLUS.samples.list" || true - fgrep -wvf ~{pcrplus_samples_list} "~{prefix}.samples.list" \ - > "~{prefix}.PCRMINUS.samples.list" || true - else - cp ~{prefix}.samples.list "~{prefix}.PCRMINUS.samples.list" - touch "~{prefix}.PCRPLUS.samples.list" - fi - >>> - - output { - File samples_list = "~{prefix}.samples.list" - File plus_samples_list = "~{prefix}.PCRPLUS.samples.list" - File minus_samples_list = "~{prefix}.PCRMINUS.samples.list" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Count biallelic SV per sample for a single chromosome -task CountSvtypes { - input{ - File vcf - File vcf_idx - String prefix - String contig - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - tabix --print-header "~{vcf}" "~{contig}" \ - | fgrep -v "MULTIALLELIC" \ - | fgrep -v "PESR_GT_OVERDISPERSION" \ - | svtk count-svtypes --no-header stdin \ - | awk -v OFS="\t" -v chr="~{contig}" '{ print $0, chr }' \ - > "~{prefix}.~{contig}.svcounts.txt" - >>> - - output { - File sv_counts = "~{prefix}.~{contig}.svcounts.txt" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Combine SV count files across chromosomes -task CombineCounts { - input{ - Array[File] svcounts - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 30, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - command <<< - set -euo pipefail - while read file; do - cat "$file" - done < ~{write_lines(svcounts)} \ - > merged_svcounts.txt - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/sum_svcounts_perSample.R \ - merged_svcounts.txt \ - "~{prefix}.summed_svcounts_per_sample.txt" - >>> - - - output { - File summed_svcounts = "~{prefix}.summed_svcounts_per_sample.txt" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Identify the list of outlier samples & generate distribution plots -task IdentifyOutliers { - input{ - File svcounts - Int n_iqr_cutoff - File samples_list - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 20, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - command <<< - # Subset input data to specified samples - sed -n '1p' ~{svcounts} > filtered_counts.input.txt - sed '1d' ~{svcounts} | fgrep -wf ~{samples_list} >> filtered_counts.input.txt - # Return list of samples exceeding cutoff for at least one sv class - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/determine_svcount_outliers.R \ - -p "~{prefix}" \ - -I "~{n_iqr_cutoff}" \ - filtered_counts.input.txt \ - "~{prefix}_svcount_outlier_plots/" - cat "~{prefix}_svcount_outlier_plots/~{prefix}.SV_count_outlier_samples.txt" \ - | fgrep -v "#" | cut -f1 | sort | uniq \ - > "~{prefix}.SV_count_outliers.samples.list" - tar -cvzf "~{prefix}_svcount_outlier_plots.tar.gz" "~{prefix}_svcount_outlier_plots/" - >>> - - output { - File outliers_list = "~{prefix}.SV_count_outliers.samples.list" - File svcount_distrib_plots = "~{prefix}_svcount_outlier_plots.tar.gz" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Exclude outliers from VCF -task ExcludeOutliers { - input{ - File vcf - File vcf_idx - File? plus_outliers_list - File minus_outliers_list - String outfile - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 100, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - - command <<< - set -euo pipefail - cat ~{plus_outliers_list} ~{minus_outliers_list} \ - | sort -Vk1,1 | uniq \ - > "~{prefix}.SV_count_outliers.samples.list" || true - tabix -H ~{vcf} | fgrep -v "##" | \ - sed 's/\t/\n/g' | awk -v OFS="\t" '{ print $1, NR }' | \ - fgrep -wf "~{prefix}.SV_count_outliers.samples.list" | cut -f2 > \ - indexes_to_exclude.txt || true - if [ $( cat indexes_to_exclude.txt | wc -l ) -gt 0 ]; then - zcat ~{vcf} | \ - cut --complement -f$( cat indexes_to_exclude.txt | paste -s -d, ) | \ - bgzip -c \ - > "~{prefix}.subsetted_preEmptyRemoval.vcf.gz" || true - /opt/sv-pipeline/scripts/drop_empty_records.py \ - "~{prefix}.subsetted_preEmptyRemoval.vcf.gz" \ - stdout | \ - bgzip -c > ~{outfile} || true - else - cp ~{vcf} ~{outfile} - fi - tabix -p vcf -f "~{outfile}" - >>> - - output { - File merged_outliers_list = "~{prefix}.SV_count_outliers.samples.list" - File vcf_no_outliers = "~{outfile}" - File vcf_no_outliers_idx = "~{outfile}.tbi" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Write new list of samples per prefix after outlier filtering -task FilterSampleList { - input{ - File original_samples_list - File outlier_samples - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - command <<< - set -euo pipefail - fgrep -wvf ~{outlier_samples} ~{original_samples_list} > \ - ~{prefix}.outliers_excluded.samples.list || true - >>> - - output { - File filtered_samples_list = "~{prefix}.outliers_excluded.samples.list" - } - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} diff --git a/wdl/GatherSampleEvidence.wdl b/wdl/GatherSampleEvidence.wdl index a5079bbf6..0c7eeb531 100644 --- a/wdl/GatherSampleEvidence.wdl +++ b/wdl/GatherSampleEvidence.wdl @@ -3,7 +3,6 @@ version 1.0 import "Structs.wdl" import "CollectCoverage.wdl" as cov import "CollectSVEvidence.wdl" as coev -import "CramToBam.ReviseBase.wdl" as rb import "Manta.wdl" as manta import "MELT.wdl" as melt import "Scramble.wdl" as scramble @@ -35,10 +34,6 @@ workflow GatherSampleEvidence { # input files, and all subsequent operations will run on the deep copy of the input file. Boolean move_bam_or_cram_files = false - # Convert ambiguous bases (e.g. K, S, Y, etc.) to N - # Only use if encountering errors (expensive!) - Boolean revise_base = false - # Localize reads parameters # set to true on default, skips localize_reads if set to false Boolean run_localize_reads = true @@ -123,7 +118,6 @@ workflow GatherSampleEvidence { # Runtime configuration overrides RuntimeAttr? runtime_attr_localize_reads RuntimeAttr? runtime_attr_split_cram - RuntimeAttr? runtime_attr_revise_base RuntimeAttr? runtime_attr_concat_bam RuntimeAttr? runtime_attr_manta RuntimeAttr? runtime_attr_melt_coverage @@ -165,23 +159,8 @@ workflow GatherSampleEvidence { } } - if (revise_base) { - call rb.CramToBamReviseBase { - input: - cram_file = select_first([LocalizeReads.output_file, bam_or_cram_file]), - cram_index = select_first([LocalizeReads.output_index, bam_or_cram_index]), - reference_fasta = reference_fasta, - reference_index = reference_index, - contiglist = select_first([primary_contigs_fai]), - samtools_cloud_docker = samtools_cloud_docker, - runtime_attr_split_cram = runtime_attr_split_cram, - runtime_attr_revise_base = runtime_attr_revise_base, - runtime_attr_concat_bam = runtime_attr_concat_bam - } - } - - File reads_file_ = select_first([CramToBamReviseBase.bam_file, LocalizeReads.output_file, bam_or_cram_file]) - File reads_index_ = select_first([CramToBamReviseBase.bam_index, LocalizeReads.output_index, bam_or_cram_index_]) + File reads_file_ = select_first([LocalizeReads.output_file, bam_or_cram_file]) + File reads_index_ = select_first([LocalizeReads.output_index, bam_or_cram_index_]) if (collect_coverage || run_melt || run_scramble) { call cov.CollectCounts { diff --git a/wdl/Genotype_3.wdl b/wdl/Genotype_3.wdl deleted file mode 100644 index ffcd3782d..000000000 --- a/wdl/Genotype_3.wdl +++ /dev/null @@ -1,71 +0,0 @@ -version 1.0 -import "Structs.wdl" -workflow Regeno{ - input{ - File depth_vcf - String batch - File regeno_vcf - File regeno_variants - String sv_pipeline_docker - } - call ConcatRegenotypedVcfs{ - input: - batch=batch, - depth_vcf=depth_vcf, - regeno_vcf=regeno_vcf, - sv_pipeline_docker=sv_pipeline_docker, - regeno_variants=regeno_variants - } - output { - File genotyped_vcf = ConcatRegenotypedVcfs.genotyped_vcf - File genotyped_vcf_idx = ConcatRegenotypedVcfs.genotyped_vcf_idx - } -} - -task ConcatRegenotypedVcfs { - input{ - String batch - File regeno_variants - File depth_vcf - File regeno_vcf - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 16, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - zcat ~{regeno_vcf} |fgrep "#" > head.txt - zcat ~{regeno_vcf} |fgrep -f ~{regeno_variants} >body.txt - cat head.txt body.txt|bgzip -c > regeno.vcf.gz - zcat ~{depth_vcf} |fgrep -f ~{regeno_variants} -v |bgzip -c > no_variant.vcf.gz - vcf-concat regeno.vcf.gz no_variant.vcf.gz \ - | vcf-sort -c \ - | bgzip -c > ~{batch}.depth.regeno_final.vcf.gz - tabix ~{batch}.depth.regeno_final.vcf.gz - >>> - output { - File genotyped_vcf = "~{batch}.depth.regeno_final.vcf.gz" - File genotyped_vcf_idx = "~{batch}.depth.regeno_final.vcf.gz.tbi" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - diff --git a/wdl/GetSampleID.wdl b/wdl/GetSampleID.wdl deleted file mode 100644 index 86ee0c8d9..000000000 --- a/wdl/GetSampleID.wdl +++ /dev/null @@ -1,159 +0,0 @@ -version 1.0 - -# Gets sample ID from a BAM/CRAM file and also generates a version of the ID that is safe for use in the pipeline - -workflow GetSampleID { - input { - File bam_or_cram_file - - # Use only for crams in requester pays buckets - Boolean requester_pays_crams = false - - Int sample_id_hash_length = 6 - - # Docker - String sv_pipeline_docker - String samtools_cloud_docker - } - - if (requester_pays_crams) { - call GetBamIDRequesterPays { - input: - bam_or_cram_file = bam_or_cram_file, - samtools_cloud_docker = samtools_cloud_docker - } - } - - if (!requester_pays_crams) { - call GetBamID { - input: - bam_or_cram_file = bam_or_cram_file, - samtools_cloud_docker = samtools_cloud_docker - } - } - - String bam_id_ = select_first([GetBamIDRequesterPays.out, GetBamID.out]) - call InternalSampleID { - input: - external_sample_id = bam_id_, - bam_or_cram_path = bam_or_cram_file, - hash_length = sample_id_hash_length, - sv_pipeline_docker = sv_pipeline_docker - } - - output { - String bam_id = bam_id_ - String safe_id = InternalSampleID.out - } -} - -task GetBamID { - input { - File bam_or_cram_file - String samtools_cloud_docker - } - - parameter_meta { - bam_or_cram_file: { - localization_optional: true - } - } - - output { - String out = read_lines("sample.txt")[0] - } - command <<< - set -euo pipefail - export GCS_OAUTH_TOKEN=`gcloud auth application-default print-access-token` - samtools view -H ~{bam_or_cram_file} \ - | grep "^@RG" \ - | awk -F "\t" '{for(i=2;i<=NF;i++){if($i~/^SM:/){a=$i}} print substr(a,4)}' \ - | sort \ - | uniq \ - > sample.txt - NUM_IDS=$(wc -l < sample.txt) - if [[ ${NUM_IDS} -eq 0 ]]; then - echo "No sample IDs were found in the BAM header" - exit 1 - fi - if [[ ${NUM_IDS} -gt 1 ]]; then - echo "Multiple sample IDs were found in the BAM header" - exit 1 - fi - >>> - runtime { - docker: samtools_cloud_docker - memory: "1 GB" - cpu: "1" - disks: "local-disk 10 HDD" - preemptible: "3" - maxRetries: "1" - } -} - -task GetBamIDRequesterPays { - input { - File bam_or_cram_file - String samtools_cloud_docker - } - - Int disk_size_gb = 10 + ceil(size(bam_or_cram_file, "GB")) - - output { - String out = read_lines("sample.txt")[0] - } - command <<< - set -euo pipefail - export GCS_OAUTH_TOKEN=`gcloud auth application-default print-access-token` - samtools view -H ~{bam_or_cram_file} \ - | grep "^@RG" \ - | awk -F "\t" '{for(i=2;i<=NF;i++){if($i~/^SM:/){a=$i}} print substr(a,4)}' \ - | sort \ - | uniq \ - > sample.txt - NUM_IDS=$(wc -l < sample.txt) - if [[ ${NUM_IDS} -eq 0 ]]; then - echo "No sample IDs were found in the BAM header" - exit 1 - fi - if [[ ${NUM_IDS} -gt 1 ]]; then - echo "Multiple sample IDs were found in the BAM header" - exit 1 - fi - >>> - runtime { - docker: samtools_cloud_docker - memory: "1 GB" - cpu: "1" - disks: "local-disk ~{disk_size_gb} HDD" - preemptible: "3" - maxRetries: "1" - } -} - -task InternalSampleID { - input { - String external_sample_id - String bam_or_cram_path - Int hash_length - String sv_pipeline_docker - } - - output { - String out = read_lines("external_id.txt")[0] - } - command <<< - set -euo pipefail - HASH=$(echo -n "~{external_sample_id}~{bam_or_cram_path}" | openssl sha1 | awk '{print substr($2,0,~{hash_length})}') - SAFE_ID=$(echo -n "~{external_sample_id}" | sed 's/[^a-zA-Z0-9]/_/g') - echo "__${SAFE_ID}__${HASH}" > external_id.txt - >>> - runtime { - docker: sv_pipeline_docker - memory: "1 GB" - cpu: "1" - disks: "local-disk 10 HDD" - preemptible: "3" - maxRetries: "1" - } -} diff --git a/wdl/IGVGeneratePlotsAllSamples.wdl b/wdl/IGVGeneratePlotsAllSamples.wdl deleted file mode 100644 index 345e2457a..000000000 --- a/wdl/IGVGeneratePlotsAllSamples.wdl +++ /dev/null @@ -1,184 +0,0 @@ -version 1.0 - -import "IGVGeneratePlotsWholeGenome.wdl" as igv -import "Structs.wdl" - -workflow IGV_all_samples { - input { - Array[String] samples - Array[String] crams - Array[String] crams_idx - File varfile - File Fasta - File Fasta_dict - File Fasta_idx - String prefix - String igv_docker - String sv_base_mini_docker - RuntimeAttr? runtime_attr_override - } - - scatter (i in range(length(samples))){ - call generate_per_sample_bed{ - input: - varfile = varfile, - sample_id = samples[i], - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_attr_override - } - - call igv.IGV_denovo as IGV_denovo { - input: - varfile=generate_per_sample_bed.per_sample_varfile, - sample = samples[i], - Cram_file = crams[i], - Cram_file_idx = crams_idx[i], - Fasta = Fasta, - Fasta_idx = Fasta_idx, - Fasta_dict = Fasta_dict, - prefix = prefix, - igv_docker = igv_docker, - sv_base_mini_docker=sv_base_mini_docker - - } - - } - call integrate_figure{ - input: - pe_tar_gz = IGV_denovo.tar_gz_pe, - prefix = prefix, - sv_base_mini_docker = sv_base_mini_docker - } - - output{ - File tar_gz_pe = integrate_figure.tar_gz_pe - } - } - - -task generate_per_sample_bed{ - input { - File varfile - String sample_id - String sv_base_mini_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr=object { - cpu_cores: 1, - mem_gb: 1, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 1, - max_retries: 1 - } - - String filename = basename(varfile, ".bed") - command <<< - set -euo pipefail - grep -w ~{sample_id} ~{varfile} | cut -f1-5 > ~{filename}.~{sample_id}.bed - >>> - - output{ - File per_sample_varfile= "~{filename}.~{sample_id}.bed" - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_base_mini_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } - - } - -task tar_gz_output_folder{ - input{ - String prefix - Array[String] flag - String sv_base_mini_docker - RuntimeAttr? runtime_attr_override - } - - command <<< - tar cvf ~{prefix}.igv_plots.tar.gz ~{prefix}.igv_plots/ - >>> - - output{ - File plots_tar_gz = "~{prefix}.igv_plots.tar.gz" - } - - RuntimeAttr default_attr=object { - cpu_cores: 1, - mem_gb: 1, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 1, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_base_mini_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task bgzip_igv_folder{ - input{ - Array[File] pe_igv_plots - } - command <<< - while read file; do - tar -zxvf ${file} - done < ~{write_lines(pe_igv_plots)}; - - - tar -czf pe_screenshots.tar.gz pe_screenshot - >>> - runtime{ - docker: "talkowski/igv_gatk:latest" - preemptible: 3 - memory: "10 GB" - disks: "local-disk 50 HDD" - } - output{ - File tar_gz_pe='pe_screenshots.tar.gz' - } - } - -task integrate_figure{ - input{ - Array[File] pe_tar_gz - String prefix - String sv_base_mini_docker - } - command <<< - mkdir ~{prefix}_igv_plots/ - while read file; do - tar -zxvf ${file} - mv pe_screenshot/* ~{prefix}_igv_plots/ - done < ~{write_lines(pe_tar_gz)}; - - tar -czf ~{prefix}_igv_plots.tar.gz ~{prefix}_igv_plots - >>> - - runtime{ - docker: sv_base_mini_docker - preemptible: 3 - memory: "10 GB" - disks: "local-disk 50 HDD" - } - output{ - File tar_gz_pe = "~{prefix}_igv_plots.tar.gz" - } -} diff --git a/wdl/IGVGeneratePlotsWholeGenome.wdl b/wdl/IGVGeneratePlotsWholeGenome.wdl deleted file mode 100644 index 7207ac59a..000000000 --- a/wdl/IGVGeneratePlotsWholeGenome.wdl +++ /dev/null @@ -1,67 +0,0 @@ -version 1.0 - -import "Structs.wdl" - -workflow IGV_denovo { - input{ - File varfile - File Fasta - File Fasta_idx - File Fasta_dict - File Cram_file - File Cram_file_idx - String sample - String prefix - String igv_docker - String sv_base_mini_docker - } - call runIGV_whole_genome{ - input: - varfile=varfile, - fasta=Fasta, - fasta_idx=Fasta_idx, - sample =sample, - prefix=prefix, - local_cram=Cram_file, - local_crai=Cram_file_idx, - var_file = varfile, - igv_docker = igv_docker - } - - output{ - File tar_gz_pe = runIGV_whole_genome.pe_plots - } -} - -task runIGV_whole_genome{ - input{ - File varfile - File fasta - File fasta_idx - String sample - String prefix - File local_cram - File local_crai - File var_file - String igv_docker - } - command <<< - set -euo pipefail - python /src/makeigvsplit_cram.py ~{var_file} 500 ~{fasta} ~{local_cram} ~{sample} all - bash pe.sh - xvfb-run --server-args="-screen 0, 1920x3000x24" bash /IGV_2.4.14/igv.sh -b pe.txt - - tar -czf ~{prefix}.pe_screenshots.tar.gz pe_screenshot - >>> - runtime { - docker: igv_docker - preemptible: 3 - memory: "10 GB" - disks: "local-disk 50 HDD" - } - output{ - File pe_plots="~{prefix}.pe_screenshots.tar.gz" - File igv="pe.txt" - } - } - diff --git a/wdl/IGVTrioPlots.wdl b/wdl/IGVTrioPlots.wdl deleted file mode 100755 index 505ccdf97..000000000 --- a/wdl/IGVTrioPlots.wdl +++ /dev/null @@ -1,83 +0,0 @@ -version 1.0 - -import "Structs.wdl" - -workflow IGV_trio { - input{ - File varfile - File Fasta - File Fasta_idx - File Fasta_dict - String pb - String fa - String mo - File pb_cram - File pb_crai - File fa_cram - File fa_crai - File mo_cram - File mo_crai - String igv_docker - } - - call runIGV_whole_genome{ - input: - varfile = varfile, - fasta = Fasta, - fasta_dict = Fasta_dict, - fasta_idx = Fasta_idx, - fa = fa, - mo = mo, - pb = pb, - pb_cram = pb_cram, - pb_crai = pb_crai, - fa_cram = fa_cram, - fa_crai = fa_crai, - mo_cram = mo_cram, - mo_crai = mo_crai, - igv_docker = igv_docker - } - - output{ - File tar_gz_pe = runIGV_whole_genome.pe_plots - } -} - -task runIGV_whole_genome{ - input{ - File varfile - File fasta - File fasta_idx - File fasta_dict - String pb - String fa - String mo - File pb_cram - File pb_crai - File fa_cram - File fa_crai - File mo_cram - File mo_crai - String igv_docker - } - command <<< - set -euo pipefail - #export GCS_OAUTH_TOKEN=`gcloud auth application-default print-access-token` - python /src/makeigvpesr_trio.py ~{varfile} ~{fasta} ~{pb} ~{pb_cram},~{fa_cram},~{mo_cram} pe_igv_plots -b 500 - bash pe.sh - xvfb-run --server-args="-screen 0, 1920x540x24" bash /IGV_2.4.14/igv.sh -b pe.txt - tar -czf ~{pb}_pe_igv_plots.tar.gz pe_igv_plots - - >>> - runtime { - docker: igv_docker - preemptible: 1 - memory: "15 GB" - disks: "local-disk 100 HDD" - } - output{ - File pe_plots="~{pb}_pe_igv_plots.tar.gz" - File pe_txt = "pe.txt" - } - } - diff --git a/wdl/IGVTrioPlotsAllSamples.wdl b/wdl/IGVTrioPlotsAllSamples.wdl deleted file mode 100755 index 3041269ec..000000000 --- a/wdl/IGVTrioPlotsAllSamples.wdl +++ /dev/null @@ -1,147 +0,0 @@ -version 1.0 - -import "IGVTrioPlots.wdl" as igv -import "Structs.wdl" - -workflow IGV_all_samples { - input { - Array[String] pb_list - Array[String] fa_list - Array[String] mo_list - Array[File] pb_cram_list - Array[File] pb_crai_list - Array[File] fa_cram_list - Array[File] fa_crai_list - Array[File] mo_cram_list - Array[File] mo_crai_list - File varfile - File Fasta - File Fasta_dict - File Fasta_idx - String prefix - String sv_base_mini_docker - String igv_docker - RuntimeAttr? runtime_attr_override - } - - scatter (i in range(length(pb_list))){ - call generate_per_sample_bed{ - input: - varfile = varfile, - sample_id = pb_list[i], - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_attr_override - } - - call igv.IGV_trio as IGV_trio { - input: - varfile=generate_per_sample_bed.per_sample_varfile, - Fasta = Fasta, - Fasta_idx = Fasta_idx, - Fasta_dict = Fasta_dict, - pb=pb_list[i], - fa=fa_list[i], - mo=mo_list[i], - pb_cram=pb_cram_list[i], - fa_cram=fa_cram_list[i], - mo_cram=mo_cram_list[i], - pb_crai=pb_crai_list[i], - fa_crai=fa_crai_list[i], - mo_crai=mo_crai_list[i], - igv_docker = igv_docker - } - } - call integrate_igv_plots{ - input: - igv_tar = IGV_trio.tar_gz_pe, - prefix = prefix, - sv_base_mini_docker = sv_base_mini_docker - } - - output{ - File tar_gz_pe = integrate_igv_plots.plot_tar - } - } - - -task generate_per_sample_bed{ - input { - File varfile - String sample_id - String sv_base_mini_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr=object { - cpu_cores: 1, - mem_gb: 1, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 1, - max_retries: 1 - } - - String filename = basename(varfile, ".bed") - command <<< - set -euo pipefail - grep -w ~{sample_id} ~{varfile} | cut -f1-5 | awk '{print $1,$2,$3,$5,$4}' | sed -e 's/ /\t/g' > ~{filename}.~{sample_id}.bed - >>> - - output{ - File per_sample_varfile= "~{filename}.~{sample_id}.bed" - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_base_mini_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } - - } - -task integrate_igv_plots{ - input { - Array[File] igv_tar - String prefix - String sv_base_mini_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr=object { - cpu_cores: 1, - mem_gb: 1, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 1, - max_retries: 1 - } - - command <<< - mkdir ~{prefix}_igv_plots - while read file; do - tar -zxf ${file} - mv pe_igv_plots/* ~{prefix}_igv_plots/ - done < ~{write_lines(igv_tar)}; - tar -czf ~{prefix}_igv_plots.tar.gz ~{prefix}_igv_plots - >>> - - output{ - File plot_tar = "~{prefix}_igv_plots.tar.gz" - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_base_mini_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } - - } diff --git a/wdl/MinGQRocOpt.wdl b/wdl/MinGQRocOpt.wdl deleted file mode 100644 index d778036f3..000000000 --- a/wdl/MinGQRocOpt.wdl +++ /dev/null @@ -1,226 +0,0 @@ -version 1.0 - -import "Structs.wdl" - -workflow MinGQRocOpt { - input{ - File trio_tarball - String prefix - File trios_list - File conditions_table - Int maxSVperTrio - Float roc_max_fdr - Int roc_min_gq - Int roc_max_gq - Int roc_step_gq - Int min_sv_per_proband_per_condition - String sv_pipeline_docker - String sv_base_mini_docker - } - # Scatter over each condition and send the trio data for ROC optimization - Array[Array[String]] conditions = read_tsv(conditions_table) - scatter ( condition in conditions ) { - # Subset variants to condition of interest & merge across trios - # Also computes median & Q1/Q3 variants per proband - # If median > min_sv_per_proband_per_condition, also runs ROC - call FilterMergeVariantsWithROC as roc_single { - input: - trio_tarball=trio_tarball, - prefix="~{prefix}", - sv_pipeline_docker=sv_pipeline_docker, - trios_list=trios_list, - condition_id=condition[0], - minSVLEN=condition[1], - maxSVLEN=condition[2], - minAF=condition[3], - maxAF=condition[4], - includeSVTYPE=condition[5], - excludeSVTYPE=condition[6], - includeFILTER=condition[7], - excludeFILTER=condition[8], - includeEV=condition[9], - excludeEV=condition[10], - maxSVperTrio=maxSVperTrio, - roc_max_fdr=roc_max_fdr, - roc_min_gq=roc_min_gq, - roc_max_gq=roc_max_gq, - roc_step_gq=roc_step_gq, - min_sv_per_proband_per_condition=min_sv_per_proband_per_condition - } - } - - # Merge across conditions - call CombineRocOptResults as combine { - input: - sv_base_mini_docker=sv_base_mini_docker, - condition_optimizations=roc_single.roc_optimal, - condition_distrib_stats=roc_single.distrib_stats, - prefix="~{prefix}" - } - - # Outputs - output { - File roc_optimal_merged = combine.combined_optimizations - File distrib_stats_merged = combine.combined_distrib_stats - } -} - - -# Subset variants to meet a given set of conditions, merge across trios, -# and run ROC if condition has enough variants per sample -task FilterMergeVariantsWithROC { - input{ - File trio_tarball - String prefix - File trios_list - String condition_id - String minSVLEN - String maxSVLEN - String minAF - String maxAF - String includeSVTYPE - String excludeSVTYPE - String includeFILTER - String excludeFILTER - String includeEV - String excludeEV - Int maxSVperTrio - Float roc_max_fdr - Int roc_min_gq - Int roc_max_gq - Int roc_step_gq - Int min_sv_per_proband_per_condition - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 8, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - command <<< - set -euo pipefail - tar -xzvf ~{trio_tarball} - find -name "trio_variant_info.txt.gz" > trio_dat_list.txt - #Iterate over families and process them one at a time - while read famdat; do - #Subset to variants matching condition - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/subset_minGQ_trio_data.R \ - --min.size "~{minSVLEN}" \ - --max.size "~{maxSVLEN}" \ - --min.freq "~{minAF}" \ - --max.freq "~{maxAF}" \ - --svtype.include "~{includeSVTYPE}" \ - --svtype.exclude "~{excludeSVTYPE}" \ - --filter.include "~{includeFILTER}" \ - --filter.exclude "~{excludeFILTER}" \ - --ev.include "~{includeEV}" \ - --ev.exclude "~{excludeEV}" \ - --max.variants "~{maxSVperTrio}" \ - "$famdat" /dev/stdout - done < trio_dat_list.txt \ - | gzip -c \ - > "~{prefix}.~{condition_id}.trio_variants.txt.gz" - #Compute median # of filtered calls per trio - if [ $( zcat "~{prefix}.~{condition_id}.trio_variants.txt.gz" | wc -l ) -gt 0 ]; then - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/helper_median_counts_per_trio.R \ - --ID "~{condition_id}" \ - "~{prefix}.~{condition_id}.trio_variants.txt.gz" \ - "~{trios_list}" \ - "~{prefix}.~{condition_id}.perTrio_distrib_stats.txt" - med=$( fgrep -v "#" "~{prefix}.~{condition_id}.perTrio_distrib_stats.txt" | cut -f2 ) - else - echo -e "#condition\thetsPerProband_median\thetsPerProband_Q1\thetsPerProband_Q2\n~{condition_id}\t0\t0\t0" \ - > "~{prefix}.~{condition_id}.perTrio_distrib_stats.txt" - med=0 - fi - #Run ROC if enough variants per proband - echo -e "FINISHED FILTERING. FOUND $med MEDIAN QUALIFYING VARIANTS PER CHILD." - if [ "$med" -gt ~{min_sv_per_proband_per_condition} ]; then - echo -e "STARTING ROC OPTIMIZATION." - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/optimize_minGQ_ROC_v2.R \ - --prefix "~{condition_id}" \ - --fdr "~{roc_max_fdr}" \ - --minGQ "~{roc_min_gq}" \ - --maxGQ "~{roc_max_gq}" \ - --step "~{roc_step_gq}" \ - "~{prefix}.~{condition_id}.trio_variants.txt.gz" \ - "~{trios_list}" \ - "./" - gzip "~{condition_id}.minGQ_ROC.data.txt" - else - echo -e "TOO FEW VARIANTS FOR ROC OPTIMIZATION." - touch "~{condition_id}.minGQ_ROC.data.txt.gz" - touch "~{condition_id}.minGQ_ROC.optimal.txt" - touch "~{condition_id}.minGQ_ROC.plot.pdf" - fi - >>> - - output { - File distrib_stats = "~{prefix}.~{condition_id}.perTrio_distrib_stats.txt" - File roc_data = "~{condition_id}.minGQ_ROC.data.txt.gz" - File roc_optimal = "~{condition_id}.minGQ_ROC.optimal.txt" - File roc_plot = "~{condition_id}.minGQ_ROC.plot.pdf" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Merge all ROC optimal cutoffs into single file for tree reconstruction -task CombineRocOptResults { - input{ - Array[File] condition_optimizations - Array[File] condition_distrib_stats - String prefix - String sv_base_mini_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 8, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - command <<< - set -euo pipefail - find . -name "*.minGQ_ROC.optimal.txt" \ - | xargs -I {} cat {} | fgrep -v "#" | sort -Vk1,1 \ - > "~{prefix}.minGQ_condition_opts.txt" ||true - find / -name "*.perTrio_distrib_stats.txt" \ - | xargs -I {} cat {} | fgrep -v "#" | sort -Vk1,1 \ - > "~{prefix}.minGQ_condition_distrib_stats.txt" ||true - >>> - - output { - File combined_optimizations = "~{prefix}.minGQ_condition_opts.txt" - File combined_distrib_stats = "~{prefix}.minGQ_condition_distrib_stats.txt" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_base_mini_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} diff --git a/wdl/Module07FilterCleanupQualRecalibration.wdl b/wdl/Module07FilterCleanupQualRecalibration.wdl deleted file mode 100644 index 3192f25aa..000000000 --- a/wdl/Module07FilterCleanupQualRecalibration.wdl +++ /dev/null @@ -1,137 +0,0 @@ -########################## -## EXPERIMENTAL WORKFLOW -########################## - -version 1.0 - -import "Structs.wdl" -import "TasksMakeCohortVcf.wdl" as MiniTasks - - -workflow FilterCleanupQualRecalibration { - input{ - File vcf - File vcf_idx - File? pcrplus_samples_list - File famfile - Float min_callrate_global - Float min_callrate_smallDels - File contiglist - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_ConcatVcfs - } - Array[Array[String]] contigs = read_tsv(contiglist) - - scatter ( contig in contigs ) { - call Cleanup { - input: - vcf=vcf, - vcf_idx=vcf_idx, - contig=contig[0], - pcrplus_samples_list=pcrplus_samples_list, - famfile=famfile, - min_callrate_global=min_callrate_global, - min_callrate_smallDels=min_callrate_smallDels, - prefix=prefix, - sv_pipeline_docker=sv_pipeline_docker - } - } - - call MiniTasks.ConcatVcfs as ConcatVcfs { - input: - vcfs=Cleanup.out_vcf, - naive=true, - outfile_prefix="~{prefix}.cleaned_filters_qual_recalibrated", - sv_base_mini_docker=sv_pipeline_docker, - runtime_attr_override=runtime_attr_ConcatVcfs - } - - output { - File cleaned_vcf = ConcatVcfs.concat_vcf - File cleaned_vcf_idx =ConcatVcfs.concat_vcf_idx - } -} - - -# Applies filters & cleanup to VCF for a single chromosome -task Cleanup { - input{ - File vcf - File vcf_idx - String contig - File? pcrplus_samples_list - File famfile - Float min_callrate_global - Float min_callrate_smallDels - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 0 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - - set -euo pipefail - #Subset to chromosome of interest - tabix -h ~{vcf} ~{contig} | bgzip -c > input.vcf.gz - #Get list of PCR- samples - tabix -H ~{vcf} | fgrep -v "##" | cut -f10- | sed 's/\t/\n/g' \ - > all.samples.list - if [ ! -z "~{pcrplus_samples_list}" ];then - fgrep -wvf ~{pcrplus_samples_list} all.samples.list \ - > pcrminus.samples.list - else - cp all.samples.list pcrminus.samples.list - fi - #Restrict famfiles - awk -F "\t" 'NR==FNR{c[$1]++;next};c[$2] > 0' all.samples.list ~{famfile} > revised.pre - awk 'NR==FNR{o[FNR]=$1; next} {t[$2]=$0} END{for(x=1; x<=FNR; x++){y=o[x]; print t[y]}}' all.samples.list revised.pre > revised.fam - fgrep -wf pcrminus.samples.list revised.fam > revised.pcrminus.fam - #Compute fraction of missing genotypes per variant - zcat input.vcf.gz \ - | awk '{ if ($7 !~ /MULTIALLELIC/) print $0 }' \ - | bgzip -c \ - > input.noMCNV.vcf.gz - plink2 \ - --missing variant-only \ - --max-alleles 2 \ - --keep-fam revised.pcrminus.fam \ - --fam revised.fam \ - --vcf input.noMCNV.vcf.gz - fgrep -v "#" plink2.vmiss \ - | awk -v OFS="\t" '{ print $2, 1-$NF }' \ - > callrates.txt - #Clean up VCF - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/filter_cleanup_and_QUAL_recalibration.PCRMinus_only.py \ - --callrate-table callrates.txt \ - --min-callrate-global ~{min_callrate_global} \ - --min-callrate-smallDels ~{min_callrate_smallDels} \ - input.vcf.gz \ - stdout \ - | bgzip -c \ - > "~{prefix}.~{contig}.cleaned_filters_qual_recalibrated.vcf.gz" - >>> - - output { - File out_vcf = "~{prefix}.~{contig}.cleaned_filters_qual_recalibrated.vcf.gz" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - diff --git a/wdl/Module07MinGQ.wdl b/wdl/Module07MinGQ.wdl deleted file mode 100644 index 9bf564415..000000000 --- a/wdl/Module07MinGQ.wdl +++ /dev/null @@ -1,924 +0,0 @@ -version 1.0 - -import "MinGQRocOpt.wdl" as roc_opt_sub -import "CalcAF.wdl" as calcAF -import "Structs.wdl" -import "TasksMakeCohortVcf.wdl" as MiniTasks -import "ReviseSVtypeINStoMEI.wdl" as ReviseSVtype - -workflow Module07MinGQ { - input { - String sv_base_mini_docker - String sv_pipeline_docker - File vcf - File vcf_idx - String prefix - File contiglist - File trios_famfile - String optimize_minSizes - String optimize_maxSizes - String optimize_minFreqs - String optimize_maxFreqs - String optimize_includeSVTYPEs - String optimize_includeFILTERs - String optimize_excludeFILTERs - String optimize_includeEV - String optimize_excludeEV - Int optimize_maxSVperTrio - Float roc_max_fdr_PCRMINUS - Float roc_max_fdr_PCRPLUS - Int roc_min_gq - Int roc_max_gq - Int roc_step_gq - Int roc_shards - Int min_sv_per_proband_per_condition - Int max_shards_per_chrom_step1 - Int min_records_per_shard_step1 - Float max_noCallRate - Int global_minGQ - String ref_build - File? sanders_2015_tarball - File? collins_2017_tarball - File? werling_2018_tarball - File? pcrplus_samples_list - File? pcrminus_filter_lookup_table - Boolean MingqTraining=! defined(pcrminus_filter_lookup_table) - - # overrides for local tasks - RuntimeAttr? runtime_attr_CombineVcfs - RuntimeAttr? runtime_attr_GatherTrioData - RuntimeAttr? runtime_attr_ReviseSVtypeMEI - RuntimeAttr? runtime_override_split_vcf_to_clean - - RuntimeAttr? runtime_attr_compute_shard_af - RuntimeAttr? runtime_attr_scatter_vcf_calcaf - RuntimeAttr? runtime_attr_combine_sharded_vcfs_calcaf - } - - Array[Array[String]] contigs = read_tsv(contiglist) - - # Get svtype of MEI - call ReviseSVtype.ReviseSVtypeINStoMEI as ReviseSVtypeMEI { - input: - vcf = vcf, - vcf_idx = vcf_idx, - sv_base_mini_docker = sv_base_mini_docker, - sv_pipeline_docker = sv_pipeline_docker, - prefix = prefix, - contiglist = contiglist, - max_shards_per_chrom_step1 = max_shards_per_chrom_step1, - min_records_per_shard_step1 = min_records_per_shard_step1, - runtime_attr_ReviseSVtypeMEI = runtime_attr_ReviseSVtypeMEI, - runtime_override_split_vcf_to_clean=runtime_override_split_vcf_to_clean - } - - # Get list of PCRMINUS samples - call GetSampleLists { - input: - vcf = ReviseSVtypeMEI.updated_vcf, - vcf_idx = ReviseSVtypeMEI.updated_vcf_idx, - pcrplus_samples_list = pcrplus_samples_list, - prefix = prefix, - sv_base_mini_docker = sv_base_mini_docker - } - - # Shard VCF per-chromosome and add AF annotation - scatter ( contig in contigs ) { - #Split VCF into PCR+ and PCR- - call calcAF.CalcAF as getAFs { - input: - vcf=ReviseSVtypeMEI.updated_vcf, - vcf_idx=ReviseSVtypeMEI.updated_vcf_idx, - sv_per_shard=1000, - prefix="~{prefix}.~{contig[0]}", - sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_scatter_vcf = runtime_attr_scatter_vcf_calcaf, - runtime_attr_compute_shard_af = runtime_attr_compute_shard_af, - runtime_attr_combine_sharded_vcfs = runtime_attr_combine_sharded_vcfs_calcaf - } - if (defined(pcrplus_samples_list)) { - call SplitPcrVcf { - input: - vcf=getAFs.vcf_wAFs, - prefix="~{prefix}.~{contig[0]}", - pcrplus_samples_list=select_first([pcrplus_samples_list]), - sv_base_mini_docker=sv_base_mini_docker - } - } - File pcr_minus_vcf = select_first([SplitPcrVcf.PCRMINUS_vcf, getAFs.vcf_wAFs]) - - # Dev note Feb 18 2021: the output from cat_AF_table_PCRMINUS is a required - # input to Module07XfBatchEffect.wdl, so the subsequent three tasks always - # need to be generated (even if passing a precomputed minGQ cutoff table) - - # Annotate PCR-specific AFs - call calcAF.CalcAF as getAFs_byPCR { - input: - vcf=ReviseSVtypeMEI.updated_vcf, - vcf_idx=ReviseSVtypeMEI.updated_vcf_idx, - sv_per_shard=1000, - prefix="~{prefix}.~{contig[0]}", - sample_pop_assignments=GetSampleLists.sample_PCR_labels, - sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_scatter_vcf = runtime_attr_scatter_vcf_calcaf, - runtime_attr_compute_shard_af = runtime_attr_compute_shard_af, - runtime_attr_combine_sharded_vcfs = runtime_attr_combine_sharded_vcfs_calcaf - } - # Gather table of AC/AN/AF for PCRPLUS and PCRMINUS samples - call GetAfTables { - input: - vcf=getAFs_byPCR.vcf_wAFs, - pcrplus_samples_list=pcrplus_samples_list, - vcf_idx=getAFs_byPCR.vcf_wAFs_idx, - prefix="~{prefix}.~{contig[0]}", - sv_pipeline_docker=sv_pipeline_docker - } - } - call CombineRocOptResults as cat_AF_table_PCRMINUS { - input: - shards=GetAfTables.PCRMINUS_AF_table, - outfile="~{prefix}.PCRMINUS.AF_preMinGQ.txt", - sv_base_mini_docker=sv_base_mini_docker, - } - - - if (MingqTraining) { - ###PCRMINUS - call SplitFamfile as SplitFamfile_PCRMINUS { - input: - vcf=pcr_minus_vcf[0], - vcf_idx=pcr_minus_vcf[0] + ".tbi", - famfile=trios_famfile, - fams_per_shard=1, - prefix="~{prefix}.PCRMINUS", - sv_base_mini_docker=sv_base_mini_docker - } - scatter ( fam in SplitFamfile_PCRMINUS.famfile_shards ) { - call CollectTrioSVdat as CollectTrioSVdat_PCRMINUS { - input: - vcf_shards=pcr_minus_vcf, - famfile=fam, - sv_pipeline_docker=sv_pipeline_docker - } - } - call GatherTrioData as GatherTrioData_PCRMINUS { - input: - files=CollectTrioSVdat_PCRMINUS.trio_SVdata, - prefix="~{prefix}.PCRMINUS", - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_attr_GatherTrioData - } - - - # Get table of all conditions to evaluate - call EnumerateConditions { - input: - prefix=prefix, - condition_shards=roc_shards, - optimize_minSizes=optimize_minSizes, - optimize_maxSizes=optimize_maxSizes, - optimize_minFreqs=optimize_minFreqs, - optimize_maxFreqs=optimize_maxFreqs, - optimize_includeSVTYPEs=optimize_includeSVTYPEs, - optimize_includeFILTERs=optimize_includeFILTERs, - optimize_excludeFILTERs=optimize_excludeFILTERs, - optimize_includeEV=optimize_includeEV, - optimize_excludeEV=optimize_excludeEV, - sv_pipeline_docker=sv_pipeline_docker - } - - - # Scatter over each shard of conditions and send the trio data for ROC optimization - scatter ( shard in EnumerateConditions.minGQ_conditions_table_noHeader_shards ) { - ### PCRMINUS - call roc_opt_sub.MinGQRocOpt as roc_opt_PCRMINUS { - input: - trio_tarball=GatherTrioData_PCRMINUS.tarball, - prefix="~{prefix}.PCRMINUS", - trios_list=SplitFamfile_PCRMINUS.cleaned_trios_famfile, - conditions_table=shard, - maxSVperTrio=optimize_maxSVperTrio, - roc_max_fdr=roc_max_fdr_PCRMINUS, - roc_min_gq=roc_min_gq, - roc_max_gq=roc_max_gq, - roc_step_gq=roc_step_gq, - min_sv_per_proband_per_condition=min_sv_per_proband_per_condition, - sv_base_mini_docker=sv_base_mini_docker, - sv_pipeline_docker=sv_pipeline_docker - } - } - - - # Merge ROC results to build minGQ filtering lookup tree - ###PCRMINUS - call CombineRocOptResults as combine_roc_optimal_PCRMINUS { - input: - shards=roc_opt_PCRMINUS.roc_optimal_merged, - outfile="~{prefix}.PCRMINUS.minGQ_condition_opts.txt", - sv_base_mini_docker=sv_base_mini_docker - } - call CombineRocOptResults as combine_roc_stats_PCRMINUS { - input: - shards=roc_opt_PCRMINUS.distrib_stats_merged, - outfile="~{prefix}.minGQ_condition_distrib_stats.txt", - sv_base_mini_docker=sv_base_mini_docker - } - - - # Create final minGQ filtering tree - ###PCRMINUS - call BuildFilterTree as build_tree_PCRMINUS { - input: - conditions_table=EnumerateConditions.minGQ_conditions_table, - condition_optimizations=combine_roc_optimal_PCRMINUS.merged_file, - condition_distrib_stats=combine_roc_stats_PCRMINUS.merged_file, - prefix="~{prefix}.PCRMINUS", - sv_pipeline_docker=sv_pipeline_docker - } - } - - # Apply filter per chromosome - ###PCRMINUS - scatter ( vcf_shard in pcr_minus_vcf ) { - call ApplyMinGQFilter as apply_filter_PCRMINUS { - input: - vcf=vcf_shard, - minGQ_lookup_table=select_first([pcrminus_filter_lookup_table,build_tree_PCRMINUS.filter_lookup_table]), - prefix="~{prefix}.PCRMINUS", - PCR_status="PCRMINUS", - maxNCR=max_noCallRate, - global_minGQ=global_minGQ, - sv_pipeline_docker=sv_pipeline_docker - } - } - - - call MiniTasks.ConcatVcfs as CombineVcfs { - input: - vcfs=apply_filter_PCRMINUS.filtered_vcf, - naive=true, - outfile_prefix=prefix, - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_attr_CombineVcfs - } - - - # Final output - output { - File filtered_vcf = CombineVcfs.concat_vcf - File filtered_vcf_idx = CombineVcfs.concat_vcf_idx - File? AF_table_preMinGQ_PCRMINUS = cat_AF_table_PCRMINUS.merged_file - File? filter_lookup_table = build_tree_PCRMINUS.filter_lookup_table - } -} - -# Get lists of PCRPLUS and PCRMINUS samples present in input VCF -task GetSampleLists { - input{ - String sv_base_mini_docker - File vcf - File vcf_idx - File? pcrplus_samples_list - String prefix - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: ceil(10 + size(vcf, "GB")), - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - bcftools query -l ~{vcf} > all_samples.list - if ~{defined(pcrplus_samples_list)}; then - awk -v OFS="\t" 'ARGIND==1{inFileA[$1]; next} {if($1 in inFileA){print $1,"PCRPLUS"}else{print $1,"PCRMINUS"}}' ~{pcrplus_samples_list} all_samples.list \ - > ~{prefix}.PCR_status_assignments.txt - else - awk -v OFS="\t" '{ print $1, "PCRMINUS" }' all_samples.list \ - > ~{prefix}.PCR_status_assignments.txt - fi - >>> - - output { - File sample_PCR_labels = "~{prefix}.PCR_status_assignments.txt" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_base_mini_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Split a VCF into two parts, corresponding to PCR+ and PCR- -task SplitPcrVcf { - input{ - File vcf - String prefix - File pcrplus_samples_list - String sv_base_mini_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: ceil(10 + size(vcf, "GB") * 2), - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - command <<< - bcftools query -l ~{vcf} > all_samples.list - awk 'ARGIND==1{inFileA[$1]; next} !($1 in inFileA)' ~{pcrplus_samples_list} all_samples.list \ - > pcrminus_samples.list - bcftools reheader -s pcrminus_samples.list -Oz -o ~{prefix}.PCRMINUS.vcf.gz - tabix ~{prefix}.PCRMINUS.vcf.gz - >>> - - output { - File PCRMINUS_vcf = "~{prefix}.PCRMINUS.vcf.gz" - File PCRMINUS_vcf_idx = "~{prefix}.PCRMINUS.vcf.gz.tbi" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_base_mini_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Get a simple table with ID/AC/AN/AF per variant, prior to minGQ -task GetAfTables { - input{ - File vcf - File vcf_idx - String prefix - String sv_pipeline_docker - File? pcrplus_samples_list - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: ceil(10 + size(vcf, "GB") * 3), - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - command <<< - set -euo pipefail - #Run vcf2bed - svtk vcf2bed --info ALL --no-samples ~{vcf} "~{prefix}.vcf2bed.bed" - #Cut to necessary columns - idxs=$( sed -n '1p' "~{prefix}.vcf2bed.bed" \ - | sed 's/\t/\n/g' \ - | awk -v OFS="\t" '{ print $1, NR }' \ - | grep -e 'name\|SVLEN\|SVTYPE\|_AC\|_AN\|_CN_NONREF_COUNT\|_CN_NUMBER' \ - | fgrep -v "OTH" \ - | cut -f2 \ - | paste -s -d\, || true ) - cut -f"$idxs" "~{prefix}.vcf2bed.bed" \ - | sed 's/^name/\#VID/g' \ - | gzip -c \ - > "~{prefix}.frequencies.preclean.txt.gz" - if [ ! -z "~{pcrplus_samples_list}" ]; then - echo -e "dummy\tPCRMINUS\ndummy2\tPCRPLUS" > dummy.tsv - else - echo -e "dummy\tPCRMINUS" > dummy.tsv - fi - #Clean frequencies - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/clean_frequencies_table.R \ - "~{prefix}.frequencies.preclean.txt.gz" \ - dummy.tsv \ - "~{prefix}.frequencies.txt" - for PCR in $( cut -f2 dummy.tsv | sort | uniq ); do - AC_idx=$( zcat "~{prefix}.frequencies.txt.gz" | sed -n '1p' | sed 's/\t/\n/g' | awk -v PCR="$PCR" '{ if ($1==PCR"_AC") print NR }' ) - AN_idx=$( zcat "~{prefix}.frequencies.txt.gz" | sed -n '1p' | sed 's/\t/\n/g' | awk -v PCR="$PCR" '{ if ($1==PCR"_AN") print NR }' ) - zcat "~{prefix}.frequencies.txt.gz" \ - | sed '1d' \ - | awk -v FS="\t" -v OFS="\t" -v AC="$AC_idx" -v AN="$AN_idx" \ - '{ print $1, $(AC), $(AN) }' \ - > ~{prefix}."$PCR".AF_preMinGQ.txt - done - if [ ! -z ~{prefix}.PCRPLUS.AF_preMinGQ.txt ]; then - touch ~{prefix}.PCRPLUS.AF_preMinGQ.txt - fi - >>> - - output { - File PCRPLUS_AF_table = "~{prefix}.PCRPLUS.AF_preMinGQ.txt" - File PCRMINUS_AF_table = "~{prefix}.PCRMINUS.AF_preMinGQ.txt" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -# Shard a trio famfile to keep only trios that are all represented in the vcf header -task SplitFamfile { - input{ - File vcf - File vcf_idx - File famfile - String prefix - String sv_base_mini_docker - Int fams_per_shard - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 30, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - command <<< - set -euo pipefail - #Get list of sample IDs & column numbers from VCF header - tabix -H ~{vcf} | fgrep -v "##" | sed 's/\t/\n/g' \ - | awk -v OFS="\t" '{ print $1, NR }' > vcf_header_columns.txt - #Iterate over families & subset VCF - while read famID pro fa mo prosex pheno; do - pro_idx=$( awk -v ID=$pro '{ if ($1==ID) print $2 }' vcf_header_columns.txt ) - fa_idx=$( awk -v ID=$fa '{ if ($1==ID) print $2 }' vcf_header_columns.txt ) - mo_idx=$( awk -v ID=$mo '{ if ($1==ID) print $2 }' vcf_header_columns.txt ) - if ! [ -z $pro_idx ] && ! [ -z $fa_idx ] && ! [ -z $mo_idx ]; then - fgrep -w "$famID" ~{famfile} || true - fi - done < ~{famfile} \ - | awk -v FS="\t" -v OFS="\t" '{ if ($2!="0" && $3!="0" && $4!="0") print $0 }' \ - > "~{prefix}.cleaned_trios.fam" # proband-only - split -l ~{fams_per_shard} --numeric-suffixes=00001 -a 5 ~{prefix}.cleaned_trios.fam famfile_shard_ - >>> - - output { - File cleaned_trios_famfile = "~{prefix}.cleaned_trios.fam" - Array[File] famfile_shards = glob("famfile_shard_*") - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_base_mini_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -# Collect a single table of all relevant variants for a single family -task CollectTrioSVdat { - input{ - Array[File] vcf_shards - File famfile - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - - command <<< - for wrapper in 1; do - #Write header - echo -e "#famID\tVID\tSVLEN\tAF\tSVTYPE\tFILTER\tpro_EV\tpro_AC\tfa_AC\tmo_AC\tpro_GQ\tfa_GQ\tmo_GQ" - #Iterate over list of VCF shards - while read vcf; do - #Get list of sample IDs & column numbers from VCF header - zfgrep "#" $vcf | fgrep -v "##" | head -n1000 |sed 's/\t/\n/g' \ - | awk -v OFS="\t" '{ print $1, NR }' > vcf_header_columns.txt - #Iterate over families & subset VCF - while read famID pro fa mo prosex pheno; do - pro_idx=$( awk -v ID=$pro '{ if ($1==ID) print $2 }' vcf_header_columns.txt ) - fa_idx=$( awk -v ID=$fa '{ if ($1==ID) print $2 }' vcf_header_columns.txt ) - mo_idx=$( awk -v ID=$mo '{ if ($1==ID) print $2 }' vcf_header_columns.txt ) - if ! [ -z $pro_idx ] && ! [ -z $fa_idx ] && ! [ -z $mo_idx ]; then - #Subset vcf to only multiallelic sites in teh family - zcat "$vcf" | cut -f1-9,"$pro_idx","$fa_idx","$mo_idx" \ - | grep -e '\#\|[0-1]\/1\|MULTIALLELIC' \ - | bgzip -c > $famID.vcf.gz - #Get list of CNVs in proband that are ≥5kb have ≥50% coverage in either parent - svtk vcf2bed -i SVTYPE --no-header $famID.vcf.gz stdout \ - | awk -v OFS="\t" '{ if ($NF ~ /DEL|DUP|CNV/) print $1, $2, $3, $4, $NF, $6 }' \ - > $famID.CNVs.bed - fgrep -w $pro $famID.CNVs.bed \ - | awk -v OFS="\t" '{ if ($3-$2>=5000 && $5!="CNV") print $1, $2, $3, $4, $5 }' \ - > $pro.CNVs.gt5kb.bed - fgrep -w $fa $famID.CNVs.bed > $fa.CNVs.bed - fgrep -w $mo $famID.CNVs.bed > $mo.CNVs.bed - #Deletions - awk -v OFS="\t" '{ if ($NF=="DEL") print $0, "1" }' $pro.CNVs.gt5kb.bed \ - | bedtools coverage -a - \ - -b <( awk '{ if ($5 ~ /DEL|CNV/) print $0 }' $fa.CNVs.bed ) \ - | awk -v OFS="\t" '{ if ($NF>=0.5) $NF=1; else $NF=0; print $1, $2, $3, $4, $5, $6, $NF }' \ - | bedtools coverage -a - \ - -b <( awk '{ if ($5 ~ /DEL|CNV/) print $0 }' $mo.CNVs.bed ) \ - | awk -v OFS="\t" '{ if ($NF>=0.5) $NF=1; else $NF=0; print $4, $6, $7, $NF }' \ - > $famID.RD_genotype_update.txt - #Duplications - awk -v OFS="\t" '{ if ($NF=="DUP") print $0, "1" }' $pro.CNVs.gt5kb.bed \ - | bedtools coverage -a - \ - -b <( awk '{ if ($5 ~ /DUP|CNV/) print $0 }' $fa.CNVs.bed ) \ - | awk -v OFS="\t" '{ if ($NF>=0.5) $NF=1; else $NF=0; print $1, $2, $3, $4, $5, $6, $NF }' \ - | bedtools coverage -a - \ - -b <( awk '{ if ($5 ~ /DUP|CNV/) print $0 }' $mo.CNVs.bed ) \ - | awk -v OFS="\t" '{ if ($NF>=0.5) $NF=1; else $NF=0; print $4, $6, $7, $NF }' \ - >> $famID.RD_genotype_update.txt - #Get variant stats - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/gather_trio_genos.py \ - --ac-adj $famID.RD_genotype_update.txt \ - --no-header \ - $famID.vcf.gz stdout "$pro" "$fa" "$mo" \ - | awk -v famID="$famID" -v OFS="\t" '{ print famID, $0 }' - fi - done < ~{famfile} - done < ~{write_lines(vcf_shards)} - done | bgzip -c > "trio_variant_info.txt.gz" - >>> - - output { - File trio_SVdata = "trio_variant_info.txt.gz" - } - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Gather all trio SV data into a single tarball (helps with Cromwell file localization) -task GatherTrioData { - input{ - Array[File] files - String prefix - String sv_base_mini_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - command <<< - tar -czvf ~{prefix}.tar.gz -T ~{write_lines(files)} - >>> - - output { - File tarball = "~{prefix}.tar.gz" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_base_mini_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Enumerate all minGQ conditions to test -task EnumerateConditions { - input{ - String prefix - Int condition_shards - String optimize_minSizes - String optimize_maxSizes - String optimize_minFreqs - String optimize_maxFreqs - String optimize_includeSVTYPEs - String optimize_includeFILTERs - String optimize_excludeFILTERs - String optimize_includeEV - String optimize_excludeEV - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - command <<< - set -euo pipefail - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/create_minGQ_tranches_table.R \ - --min.sizes "~{optimize_minSizes}" \ - --max.sizes "~{optimize_maxSizes}" \ - --min.freqs "~{optimize_minFreqs}" \ - --max.freqs "~{optimize_maxFreqs}" \ - --svtype.include "~{optimize_includeSVTYPEs}" \ - --filter.include "~{optimize_includeFILTERs}" \ - --filter.exclude "~{optimize_excludeFILTERs}" \ - --ev.include "~{optimize_includeEV}" \ - --ev.exclude "~{optimize_excludeEV}" \ - "~{prefix}.minGQ_conditions.txt" - fgrep -v "#" "~{prefix}.minGQ_conditions.txt" \ - > "~{prefix}.minGQ_conditions.noHeader.txt" - /opt/sv-pipeline/04_variant_resolution/scripts/evenSplitter.R \ - -S ~{condition_shards} \ - "~{prefix}.minGQ_conditions.noHeader.txt" \ - "~{prefix}.minGQ_conditions.noHeader.shard" - >>> - - output { - File minGQ_conditions_table = "~{prefix}.minGQ_conditions.txt" - File minGQ_conditions_table_noHeader = "~{prefix}.minGQ_conditions.noHeader.txt" - Array[File] minGQ_conditions_table_noHeader_shards = glob("~{prefix}.minGQ_conditions.noHeader.shard*") - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Merge ROC optimal cutoffs or stats -task CombineRocOptResults { - input{ - Array[File] shards - String outfile - String sv_base_mini_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 4, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - command <<< - cat ~{write_lines(shards)} | xargs -I {} fgrep -v "#" {} | sort -Vk1,1 > ~{outfile} - >>> - - output { - File merged_file = "~{outfile}" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_base_mini_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Build final minGQ filtering tree -task BuildFilterTree { - input{ - File conditions_table - File condition_optimizations - File condition_distrib_stats - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 4, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - command <<< - set -euo pipefail - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/create_minGQ_lookup_table.R \ - "~{conditions_table}" \ - "~{condition_distrib_stats}" \ - "~{condition_optimizations}" \ - "~{prefix}.minGQ.ordered_tree_hierarchy.txt" \ - "~{prefix}.minGQ.filter_lookup_table.txt" - >>> - - - output { - File ordered_tree_hierarchy = "~{prefix}.minGQ.ordered_tree_hierarchy.txt" - File filter_lookup_table = "~{prefix}.minGQ.filter_lookup_table.txt" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Apply minGQ filter to VCF -task ApplyMinGQFilter { - input{ - File vcf - File minGQ_lookup_table - String prefix - String PCR_status - Float maxNCR - Int global_minGQ - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 4, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - command <<< - set -euo pipefail - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/apply_minGQ_filter.py \ - --minGQ "~{global_minGQ}" \ - --maxNCR "~{maxNCR}" \ - --simplify-INS-SVTYPEs \ - --cleanAFinfo \ - --prefix "~{PCR_status}" \ - "~{vcf}" \ - "~{minGQ_lookup_table}" \ - stdout \ - | fgrep -v "##INFO= "~{prefix}.minGQ_filtered.vcf.gz" - >>> - - output { - File filtered_vcf = "~{prefix}.minGQ_filtered.vcf.gz" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Merge PCRPLUS and PCRMINUS VCFs for a single chromosome -task MergePcrVCFs { - input{ - File? PCRPLUS_vcf - File PCRMINUS_vcf - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 4, - disk_gb: 20, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - command <<< - if [ ! -z "~{PCRPLUS_vcf}" ];then - #Sanitize FILTER columns - zcat "~{PCRPLUS_vcf}" | cut -f7 | grep -ve '^#' | sed '1d' > PCRPLUS_filters.txt - zcat "~{PCRMINUS_vcf}" | cut -f7 | grep -ve '^#' | sed '1d' > PCRMINUS_filters.txt - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/merge_filter_columns.py \ - PCRPLUS_filters.txt \ - PCRMINUS_filters.txt \ - merged_filters.txt - #Write new VCF header - zgrep -e '^##' ~{PCRPLUS_vcf} > "~{prefix}.minGQ_filtered.vcf" - zgrep -e '^##' ~{PCRMINUS_vcf} | fgrep "NOCALL_RATE" >> "~{prefix}.minGQ_filtered.vcf" - #Column-wise merger - paste \ - <( zgrep -ve '^##' "~{PCRPLUS_vcf}" | cut -f1-6 ) \ - <( cat <( echo -e "FILTER" ) merged_filters.txt ) \ - <( zgrep -ve '^##' "~{PCRPLUS_vcf}" | cut -f8- ) \ - <( zgrep -ve '^##' "~{PCRMINUS_vcf}" | cut -f10- ) \ - >> "~{prefix}.minGQ_filtered.vcf" - /opt/sv-pipeline/scripts/drop_empty_records.py \ - "~{prefix}.minGQ_filtered.vcf" \ - "~{prefix}.minGQ_filtered.no_blanks.vcf" - #Bgzip & tabix - bgzip -f "~{prefix}.minGQ_filtered.no_blanks.vcf" - else - #Sanitize FILTER columns - zcat "~{PCRMINUS_vcf}" | cut -f7 | grep -ve '^#' | sed '1d' > PCRMINUS_filters.txt - #Write new VCF header - zcat "~{PCRMINUS_vcf}" | sed -n '1,1000p' | grep -e '^##' > "~{prefix}.minGQ_filtered.vcf" - #Column-wise merger - paste \ - <( zcat "~{PCRMINUS_vcf}" | grep -ve '^##' | cut -f1-6 ) \ - <( cat <( echo -e "FILTER" ) PCRMINUS_filters.txt ) \ - <( zcat "~{PCRMINUS_vcf}" | grep -ve '^##' | cut -f8- ) \ - >> "~{prefix}.minGQ_filtered.vcf" - /opt/sv-pipeline/scripts/drop_empty_records.py \ - "~{prefix}.minGQ_filtered.vcf" \ - "~{prefix}.minGQ_filtered.no_blanks.vcf" - #Bgzip & tabix - bgzip -f "~{prefix}.minGQ_filtered.no_blanks.vcf" - fi - >>> - - output { - File merged_vcf = "~{prefix}.minGQ_filtered.no_blanks.vcf.gz" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} diff --git a/wdl/Module07MinGQStep2MergePCRStatus.wdl b/wdl/Module07MinGQStep2MergePCRStatus.wdl deleted file mode 100644 index f79b9bc87..000000000 --- a/wdl/Module07MinGQStep2MergePCRStatus.wdl +++ /dev/null @@ -1,303 +0,0 @@ -# Author: Ryan Collins - -# This is an analysis WDL that wraps three steps in the Talkowski SV pipeline: -# 1) minGQ optimization -# 2) minGQ filter application -# 3) post-filter VCF QC - -# This is the second build of this workflow, which enumerates many more fine-grained -# minGQ filtering conditions, and may not be optimized for small cohorts with fewer -# variants - -version 1.0 - -import "MainVcfQc.wdl" as QC - - -workflow MinGQStep2MergePcrStatus { - input{ - String prefix - File vcf_pcrplus - File vcf_pcrplus_idx - File vcf_pcrminus - File vcf_pcrminus_idx - Array[File]? thousand_genomes_benchmark_calls - Array[File]? hgsv_benchmark_calls - Array[File]? asc_benchmark_calls - File? sanders_2015_tarball - File? collins_2017_tarball - File? werling_2018_tarball - Int? random_seed - - File trios_famfile - File contiglist - - String sv_pipeline_qc_docker - String sv_base_mini_docker - String sv_pipeline_docker - RuntimeAttr? runtime_override_collect_vids_per_sample - } - - Array[String] contigs = transpose(read_tsv(contiglist))[0] - - - scatter (i in range(length(contigs))) { - - #split PCR Minus vcf into each contig - call Split_Vcf_by_contig as split_pcr_plus{ - input: - vcf = vcf_pcrplus, - vcf_idx = vcf_pcrplus_idx, - contig = contigs[i], - sv_pipeline_docker = sv_pipeline_docker - } - - call Split_Vcf_by_contig as split_pcr_minux{ - input: - vcf = vcf_pcrminus, - vcf_idx = vcf_pcrminus_idx, - contig = contigs[i], - sv_pipeline_docker = sv_pipeline_docker - } - - - # Merge filtered VCFs by PCR status & across chromosomes - call merge_PCR_VCFs { - input: - PCRPLUS_vcf=split_pcr_plus.vcf_out, - PCRMINUS_vcf=split_pcr_minux.vcf_out, - prefix=prefix - } - } - - call combine_vcfs { - input: - vcfs=merge_PCR_VCFs.merged_vcf, - prefix=prefix - } - - - # Run QC on filtered VCF - call QC.MainVcfQc as filtered_VCF_QC { - input: - ped_file=trios_famfile, - prefix="${prefix}", - sv_per_shard=10000, - samples_per_shard=100, - random_seed=random_seed, - sv_pipeline_qc_docker=sv_pipeline_qc_docker, - sv_base_mini_docker=sv_base_mini_docker, - sv_pipeline_docker=sv_pipeline_docker, - runtime_override_collect_vids_per_sample=runtime_override_collect_vids_per_sample - } - - # Final output - output { - File filtered_VCF = combine_vcfs.vcf - File filtered_VCF_idx = combine_vcfs.vcf_idx - File filtered_VCF_QC_output = filtered_VCF_QC.sv_vcf_qc_output - } -} - - -# Get lists of PCRPLUS and PCRMINUS samples present in input VCF -task get_sample_lists { - input{ - File vcf - File vcf_idx - File PCRPLUS_samples_list - String prefix - } - - command <<< - set -euo pipefail - tabix -H ~{vcf} | fgrep -v "##" | cut -f10- | sed 's/\t/\n/g' > all_samples.list - fgrep -wf ~{PCRPLUS_samples_list} all_samples.list > "~{prefix}.PCRPLUS.samples.list" || true - fgrep -wvf ~{PCRPLUS_samples_list} all_samples.list > "~{prefix}.PCRMINUS.samples.list" || true - cat \ - <( awk -v OFS="\t" '{ print $1, "PCRPLUS" }' "~{prefix}.PCRPLUS.samples.list" || true ) \ - <( awk -v OFS="\t" '{ print $1, "PCRMINUS" }' "~{prefix}.PCRMINUS.samples.list" || true ) \ - > "~{prefix}.PCR_status_assignments.txt" - >>> - - output { - File updated_PCRPLUS_samples_list = "~{prefix}.PCRPLUS.samples.list" - File updated_PCRMINUS_samples_list = "~{prefix}.PCRMINUS.samples.list" - File sample_PCR_labels = "~{prefix}.PCR_status_assignments.txt" - } - - runtime { - docker: "talkowski/sv-pipeline@sha256:193d18c26100fdd603c569346722513f5796685e990ec3abcaeb4be887062a1a" - disks: "local-disk 50 HDD" - preemptible: 1 - maxRetries: 1 - } -} - - -# Split a VCF into two parts, corresponding to PCR+ and PCR- -task split_PCR_vcf { - input{ - File vcf - String prefix - File PCRPLUS_samples_list - } - - command <<< - set -euo pipefail - #Get index of PCR+ samples - PCRPLUS_idxs=$( zcat ~{vcf} | sed -n '1,500p' | fgrep "#" | fgrep -v "##" \ - | sed 's/\t/\n/g' | awk -v OFS="\t" '{ print NR, $1 }' \ - | fgrep -wf ~{PCRPLUS_samples_list} | cut -f1 | paste -s -d, ) - #Get PCR+ VCF - zcat ~{vcf} \ - | cut -f1-9,"$PCRPLUS_idxs" \ - | bgzip -c \ - > "~{prefix}.PCRPLUS.vcf.gz" - tabix -f "~{prefix}.PCRPLUS.vcf.gz" - #Get PCR- VCF - zcat ~{vcf} \ - | cut --complement -f"$PCRPLUS_idxs" \ - | bgzip -c \ - > "~{prefix}.PCRMINUS.vcf.gz" - tabix -f "~{prefix}.PCRMINUS.vcf.gz" - >>> - - output { - File PCRPLUS_vcf = "~{prefix}.PCRPLUS.vcf.gz" - File PCRPLUS_vcf_idx = "~{prefix}.PCRPLUS.vcf.gz.tbi" - File PCRMINUS_vcf = "~{prefix}.PCRMINUS.vcf.gz" - File PCRMINUS_vcf_idx = "~{prefix}.PCRMINUS.vcf.gz.tbi" - } - - runtime { - docker: "talkowski/sv-pipeline@sha256:193d18c26100fdd603c569346722513f5796685e990ec3abcaeb4be887062a1a" - disks: "local-disk 50 HDD" - preemptible: 1 - maxRetries: 1 - } -} - - -task Split_Vcf_by_contig { - input{ - File vcf - File vcf_idx - String contig - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 4, - disk_gb: 20, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - command <<< - set -euo pipefail - #Tabix chromosome of interest - tabix -h ~{vcf} ~{contig} | bgzip -c > ~{contig}.vcf.gz - tabix -p vcf ~{contig}.vcf.gz - >>> - - output { - File vcf_out = "~{contig}.vcf.gz" - File vcf_out_index = "~{contig}.vcf.gz.tbi" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -# Merge PCRPLUS and PCRMINUS VCFs for a single chromosome -task merge_PCR_VCFs { - input{ - File PCRPLUS_vcf - File PCRMINUS_vcf - String prefix - } - - command <<< - #Sanitize FILTER columns - zcat ~{PCRPLUS_vcf} | cut -f7 | grep -ve '^#' | sed '1d' > PCRPLUS_filters.txt - zcat ~{PCRMINUS_vcf} | cut -f7 | grep -ve '^#' | sed '1d' > PCRMINUS_filters.txt - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/merge_filter_columns.py \ - PCRPLUS_filters.txt \ - PCRMINUS_filters.txt \ - merged_filters.txt - #Write new VCF header - zcat ~{PCRPLUS_vcf} | sed -n '1,1000p' | grep -e '^##' > "~{prefix}.minGQ_filtered.vcf" - zcat ~{PCRMINUS_vcf} | sed -n '1,1000p' | grep -e '^##' | fgrep "NOCALL_RATE" >> "~{prefix}.minGQ_filtered.vcf" - #Column-wise merger - paste \ - <( zcat ~{PCRPLUS_vcf} | grep -ve '^##' | cut -f1-6 ) \ - <( cat <( echo -e "FILTER" ) merged_filters.txt ) \ - <( zcat ~{PCRPLUS_vcf} | grep -ve '^##' | cut -f8- ) \ - <( zcat ~{PCRMINUS_vcf} | grep -ve '^##' | cut -f10- ) \ - >> "~{prefix}.minGQ_filtered.vcf" - /opt/sv-pipeline/scripts/drop_empty_records.py \ - "~{prefix}.minGQ_filtered.vcf" \ - "~{prefix}.minGQ_filtered.no_blanks.vcf" - - #extract and add MCNVs: - awk '{if ($7=="MULTIALLELIC") print}' "~{prefix}.minGQ_filtered.vcf" >> "~{prefix}.minGQ_filtered.no_blanks.vcf" - - #Bgzip & tabix - vcf-sort "~{prefix}.minGQ_filtered.no_blanks.vcf" | bgzip > "~{prefix}.minGQ_filtered.no_blanks.vcf.gz" - >>> - - output { - File merged_vcf = "~{prefix}.minGQ_filtered.no_blanks.vcf.gz" - } - - runtime { - preemptible: 1 - maxRetries: 1 - docker: "talkowski/sv-pipeline@sha256:193d18c26100fdd603c569346722513f5796685e990ec3abcaeb4be887062a1a" - disks: "local-disk 250 SSD" - memory: "4 GB" - } -} - -# Merge per-chromosome VCF shards -task combine_vcfs { - input{ - Array[File] vcfs - String prefix - - } - - command <<< - vcf-concat ~{sep=" " vcfs} | vcf-sort | bgzip -c > ~{prefix}.minGQ_filtered.vcf.gz; - tabix -p vcf ~{prefix}.minGQ_filtered.vcf.gz - >>> - - runtime { - preemptible: 0 - maxRetries: 1 - docker: "talkowski/sv-pipeline@sha256:193d18c26100fdd603c569346722513f5796685e990ec3abcaeb4be887062a1a" - disks: "local-disk 250 SSD" - memory: "4 GB" - } - - output { - File vcf="~{prefix}.minGQ_filtered.vcf.gz" - File vcf_idx="~{prefix}.minGQ_filtered.vcf.gz.tbi" - } -} - - - - - diff --git a/wdl/Module07XfBatchEffect.wdl b/wdl/Module07XfBatchEffect.wdl deleted file mode 100644 index b2148955e..000000000 --- a/wdl/Module07XfBatchEffect.wdl +++ /dev/null @@ -1,689 +0,0 @@ -########################## -## EXPERIMENTAL WORKFLOW -########################## - -version 1.0 - -import "prune_add_af.wdl" as calcAF -import "batch_effect_helper.wdl" as helper -import "TasksMakeCohortVcf.wdl" as MiniTasks - -workflow XfBatchEffect { - input{ - File vcf - File vcf_idx - File sample_batch_assignments - File batches_list - File sample_pop_assignments - File excludesamples_list #empty file if need be - File famfile - File contiglist - File? par_bed - Int variants_per_shard - Int? pairwise_cutoff=2 - Int? onevsall_cutoff=2 - String prefix - File af_pcrmins_premingq - String sv_pipeline_docker - - RuntimeAttr? runtime_attr_merge_labeled_vcfs - } - Array[String] batches = read_lines(batches_list) - Array[Array[String]] contigs = read_tsv(contiglist) - - # Shard VCF per batch, compute pops-specific AFs, and convert to table of VID & AF stats - scatter ( batch in batches ) { - # Get list of samples to include & exclude per batch - call GetBatchSamplesList { - input: - vcf=vcf, - vcf_idx=vcf_idx, - batch=batch, - sample_batch_assignments=sample_batch_assignments, - probands_list=excludesamples_list, - sv_pipeline_docker=sv_pipeline_docker - } - # Prune VCF to samples - call calcAF.prune_and_add_vafs as getAFs { - input: - vcf=vcf, - vcf_idx=vcf_idx, - prefix=batch, - sample_pop_assignments=sample_pop_assignments, - prune_list=GetBatchSamplesList.exclude_samples_list, - famfile=famfile, - sv_per_shard=25000, - contiglist=contiglist, - drop_empty_records="FALSE", - par_bed=par_bed, - sv_pipeline_docker=sv_pipeline_docker - } - # Get minimal table of AF data per batch, split by ancestry - call GetFreqTable { - input: - vcf=getAFs.output_vcf, - sample_pop_assignments=sample_pop_assignments, - prefix=batch, - sv_pipeline_docker=sv_pipeline_docker - } - } - - # Merge frequency results per batch into a single table of all variants with AF data across batches - call MergeFreqTables { - input: - tables=GetFreqTable.freq_data, - batches_list=batches_list, - prefix=prefix, - sv_pipeline_docker=sv_pipeline_docker - } - call MergeFreqTables as MergeFreqTables_allPops { - input: - tables=GetFreqTable.freq_data_allPops, - batches_list=batches_list, - prefix=prefix, - sv_pipeline_docker=sv_pipeline_docker - } - - # Compare frequencies before and after minGQ, and generate list of variants - # that are significantly different between the steps - call CompareFreqsPrePostMinGQPcrminus { - input: - af_pcrmins_premingq=af_pcrmins_premingq, - AF_postMinGQ_table=MergeFreqTables_allPops.merged_table, - prefix=prefix, - sv_pipeline_docker=sv_pipeline_docker - } - - # Generate matrix of correlation coefficients for all batches, by population & SVTYPE - #scatter ( pop in populations ) { - # call MakeCorrelationMatrices { - # input: - # freq_table=MergeFreqTables.merged_table, - # pop=pop, - # batches_list=batches_list, - # prefix=prefix, - # sv_pipeline_docker=sv_pipeline_docker - # } - #} - - # Make list of nonredundant pairs of batches to be evaluated - call MakeBatchPairsList { - input: - batches_list=batches_list, - prefix=prefix, - sv_pipeline_docker=sv_pipeline_docker - } - Array[Array[String]] batch_pairs = read_tsv(MakeBatchPairsList.batch_pairs_list) - - # Compute AF stats per pair of batches & determine variants with batch effects - scatter ( pair in batch_pairs ) { - call helper.check_batch_effects as check_batch_effects { - input: - freq_table=MergeFreqTables.merged_table, - batch1=pair[0], - batch2=pair[1], - prefix=prefix, - variants_per_shard=variants_per_shard, - sv_pipeline_docker=sv_pipeline_docker - } - } - # Collect results from pairwise batch effect detection - call MergeVariantFailureLists as merge_pairwise_checks { - input: - fail_variant_lists=check_batch_effects.batch_effect_variants, - prefix="~{prefix}.pairwise_comparisons", - sv_pipeline_docker=sv_pipeline_docker - } - - # Perform one-vs-all comparison of AFs per batch to find batch-specific sites - scatter ( batch in batches ) { - call helper.check_batch_effects as one_vs_all_comparison { - input: - freq_table=MergeFreqTables.merged_table, - batch1=batch, - batch2="ALL_OTHERS", - prefix=prefix, - variants_per_shard=variants_per_shard, - sv_pipeline_docker=sv_pipeline_docker - } - } - # Collect results from pairwise batch effect detection - call MergeVariantFailureLists as merge_one_vs_all_checks { - input: - fail_variant_lists=one_vs_all_comparison.batch_effect_variants, - prefix="~{prefix}.one_vs_all_comparisons", - sv_pipeline_docker=sv_pipeline_docker - } - - # Distill final table of variants to be reclassified - call MakeReclassificationTable { - input: - freq_table=MergeFreqTables.merged_table, - pairwise_fails=merge_pairwise_checks.fails_per_variant_all, - onevsall_fails=merge_one_vs_all_checks.fails_per_variant_all, - prefix=prefix, - pairwise_cutoff = pairwise_cutoff, - onevsall_cutoff = onevsall_cutoff, - sv_pipeline_docker=sv_pipeline_docker - } - - # Apply batch effect labels - scatter ( contig in contigs ) { - call ApplyBatchEffectLabels as apply_labels_perContig { - input: - vcf=vcf, - vcf_idx=vcf_idx, - contig=contig[0], - reclassification_table=MakeReclassificationTable.reclassification_table, - mingq_prePost_pcrminus_fails=CompareFreqsPrePostMinGQPcrminus.pcrminus_fails, - prefix="~{prefix}.~{contig[0]}", - sv_pipeline_docker=sv_pipeline_docker - } - } - call MiniTasks.ConcatVcfs as merge_labeled_vcfs { - input: - vcfs=apply_labels_perContig.labeled_vcf, - naive=true, - outfile_prefix="~{prefix}.batch_effects_labeled_merged", - sv_base_mini_docker=sv_pipeline_docker, - runtime_attr_override=runtime_attr_merge_labeled_vcfs - } - - output { - File labeled_vcf = merge_labeled_vcfs.concat_vcf - File labeled_vcf_idx = merge_labeled_vcfs.concat_vcf_idx - } -} - - -# Get list of samples to include & exclude per batch -# Always exclude probands from all batches -task GetBatchSamplesList { - input{ - File vcf - File vcf_idx - String batch - File sample_batch_assignments - File probands_list - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 4, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - # Get list of all samples present in VCF header - tabix -H ~{vcf} | fgrep -v "##" | cut -f10- | sed 's/\t/\n/g' | sort -Vk1,1 \ - > all_samples.list - # Get list of samples in batch - fgrep -w ~{batch} ~{sample_batch_assignments} | cut -f1 \ - | fgrep -wf - all_samples.list \ - | fgrep -wvf ~{probands_list} \ - > "~{batch}.samples.list" || true - # Get list of samples not in batch - fgrep -wv ~{batch} ~{sample_batch_assignments} | cut -f1 \ - | cat - ~{probands_list} | sort -Vk1,1 | uniq \ - | fgrep -wf - all_samples.list \ - > "~{batch}.exclude_samples.list" || true - >>> - - output { - File include_samples_list = "~{batch}.samples.list" - File exclude_samples_list = "~{batch}.exclude_samples.list" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Run vcf2bed and subset to just include VID, SVTYPE, SVLEN, _AC, and _AN -task GetFreqTable { - input{ - File vcf - File sample_pop_assignments - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 6, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - #Run vcf2bed - svtk vcf2bed \ - --info ALL \ - --no-samples \ - ~{vcf} "~{prefix}.vcf2bed.bed" - ### Create table of freqs by ancestry - #Cut to necessary columns - idxs=$( sed -n '1p' "~{prefix}.vcf2bed.bed" \ - | sed 's/\t/\n/g' \ - | awk -v OFS="\t" '{ print $1, NR }' \ - | grep -e 'name\|SVLEN\|SVTYPE\|_AC\|_AN\|_CN_NONREF_COUNT\|_CN_NUMBER' \ - | fgrep -v "OTH" \ - | cut -f2 \ - | paste -s -d\, || true ) - cut -f"$idxs" "~{prefix}.vcf2bed.bed" \ - | sed 's/^name/\#VID/g' \ - | gzip -c \ - > "~{prefix}.frequencies.preclean.txt.gz" - #Clean frequencies (note that this script automatically gzips the output file taken as the last argument) - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/clean_frequencies_table.R \ - "~{prefix}.frequencies.preclean.txt.gz" \ - "~{sample_pop_assignments}" \ - "~{prefix}.frequencies.txt" - ### Create table of freqs, irrespective of ancestry - #Cut to necessary columns - idxs=$( sed -n '1p' "~{prefix}.vcf2bed.bed" \ - | sed 's/\t/\n/g' \ - | awk -v OFS="\t" '{ if ($1=="name" || $1=="SVLEN" || $1=="SVTYPE" || $1=="AC" || $1=="AN" || $1=="CN_NUMBER" || $1=="CN_NONREF_COUNT") print NR }' \ - | paste -s -d\, || true ) - cut -f"$idxs" "~{prefix}.vcf2bed.bed" > minfreq.subset.bed - svtype_idx=$( sed -n '1p' minfreq.subset.bed \ - | sed 's/\t/\n/g' \ - | awk -v OFS="\t" '{ if ($1=="SVTYPE") print NR }' || true ) - awk -v OFS="\t" -v sidx="$svtype_idx" '{ if ($sidx=="CNV" || $sidx=="MCNV") print $1, $2, $3, $6, $7; else print $1, $2, $3, $4, $5 }' minfreq.subset.bed \ - | sed 's/^name/\#VID/g' \ - | gzip -c \ - > "~{prefix}.frequencies.allPops.txt.gz" - >>> - - output { - File freq_data = "~{prefix}.frequencies.txt.gz" - File freq_data_allPops = "~{prefix}.frequencies.allPops.txt.gz" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Combine frequency data across batches -task MergeFreqTables { - input{ - Array[File] tables - File batches_list - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 16, - disk_gb: 100, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - - #Get list of batch IDs and batch table paths - while read batch; do - echo "$batch" - find ./ -name "$batch.frequencies*txt.gz" | sed -n '1p' - done < ~{batches_list} | paste - - \ - > input.list - - #Make sure all input files have the same number of lines - while read batch file; do - zcat "$file" | wc -l - done < input.list > nlines.list - nlines=$( sort nlines.list | uniq | wc -l ) - if [ "$nlines" -gt 1 ]; then - echo "AT LEAST ONE INPUT FILE HAS A DIFFERENT NUMBER OF LINES" - exit 0 - fi - - #Prep files for paste joining - echo "PREPPING FILES FOR MERGING" - while read batch file; do - #Header - zcat "$file" | sed -n '1p' | cut -f1-3 - #Body - zcat "$file" | sed '1d' \ - | sort -Vk1,1 \ - | cut -f1-3 - done < <( sed -n '1p' input.list ) \ - > header.txt - while read batch file; do - for wrapper in 1; do - #Header - zcat "$file" | sed -n '1p' \ - | cut -f4- | sed 's/\t/\n/g' \ - | awk -v batch="$batch" '{ print $1"."batch }' \ - | paste -s - #Body - zcat "$file" | sed '1d' \ - | sort -Vk1,1 \ - | cut -f4- - done > "$batch.prepped.txt" - done < input.list - - #Join files with simple paste - paste \ - header.txt \ - $( awk -v ORS=" " '{ print $1".prepped.txt" }' input.list ) \ - | gzip -c \ - > "~{prefix}.merged_AF_table.txt.gz" - >>> - - output { - File merged_table = "~{prefix}.merged_AF_table.txt.gz" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Compare -task CompareFreqsPrePostMinGQPcrminus { - input{ - File af_pcrmins_premingq - File AF_postMinGQ_table - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 8, - disk_gb: 30, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/compare_freqs_pre_post_minGQ.PCRMinus_only.R \ - ~{af_pcrmins_premingq} \ - ~{AF_postMinGQ_table} \ - ./ \ - "~{prefix}." - >>> - - output { - File pcrminus_fails = "~{prefix}.PCRMINUS_minGQ_AF_prePost_fails.VIDs.list" - File minGQ_prePost_comparison_data = "~{prefix}.minGQ_AF_prePost_comparison.data.txt.gz" - File minGQ_prePost_comparison_plot = "~{prefix}.minGQ_AF_prePost_comparison.plot.png" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Calculate & plot cross-batch correlation coefficient matrixes -task MakeCorrelationMatrices { - input{ - File freq_table - String pop - File batches_list - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 8, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/correlate_batches_singlePop.R \ - ~{batches_list} \ - ~{freq_table} \ - "~{pop}" \ - "~{prefix}.~{pop}" - >>> - output { - Array[File] corr_matrixes = glob("~{prefix}.~{pop}.*.R2_matrix.txt") - Array[File] heat_maps = glob("~{prefix}.~{pop}.*heatmap*.pdf") - Array[File] dot_plots = glob("~{prefix}.~{pop}.*perBatch_R2_sina_plot.pdf") - } - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Generate list of all pairs of batches to be compared -task MakeBatchPairsList { - input{ - File batches_list - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 4, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/make_batch_pairs_list.R \ - ~{batches_list} \ - "~{prefix}.nonredundant_batch_pairs.txt" - >>> - - output { - File batch_pairs_list = "~{prefix}.nonredundant_batch_pairs.txt" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Merge lists of batch effect checks and count total number of times each variant failed -task MergeVariantFailureLists { - input{ - Array[File] fail_variant_lists - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 4, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - #Write list of paths to all batch effect variant lists - #Get master list of PCR+ to PCR+ failures #removed from the PCR- only projects - #Get master list of PCR- to PCR- failures #removed from the PCR- only projects - #Get master list of PCR+ to PCR- failures #removed from the PCR- only projects - #Get master list of all possible failures - cat ~{write_lines(fail_variant_lists)} \ - | xargs -I {} cat {} \ - | sort -Vk1,1 | uniq -c \ - | awk -v OFS="\t" '{ print $2, $1 }' \ - > "~{prefix}.all.failures.txt" || true - >>> - - output { - File fails_per_variant_all = "~{prefix}.all.failures.txt" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Consolidate all batch effect check results into a single table with reclassification per variant -task MakeReclassificationTable { - input{ - File freq_table - File pairwise_fails - File onevsall_fails - String prefix - Int? pairwise_cutoff - Int? onevsall_cutoff - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 8, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/make_batch_effect_reclassification_table.PCRMinus_only.R \ - ~{freq_table} \ - ~{pairwise_fails} \ - ~{onevsall_fails} \ - "~{prefix}.batch_effect_reclassification_table.txt" \ - ~{pairwise_cutoff} \ - ~{onevsall_cutoff} - >>> - - output { - File reclassification_table = "~{prefix}.batch_effect_reclassification_table.txt" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Apply batch effect labels to VCF -task ApplyBatchEffectLabels { - input{ - File vcf - File vcf_idx - String contig - File reclassification_table - File mingq_prePost_pcrminus_fails - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 4, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - tabix -h ~{vcf} ~{contig} \ - | /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/label_batch_effects.PCRMinus_only.py \ - --unstable-af-pcrminus ~{mingq_prePost_pcrminus_fails} \ - stdin \ - ~{reclassification_table} \ - stdout \ - | bgzip -c \ - > "~{prefix}.batch_effects_labeled.vcf.gz" - tabix -p vcf -f "~{prefix}.batch_effects_labeled.vcf.gz" - >>> - - output { - File labeled_vcf = "~{prefix}.batch_effects_labeled.vcf.gz" - File labeled_vcf_idx = "~{prefix}.batch_effects_labeled.vcf.gz.tbi" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} diff --git a/wdl/Module09VisualizeSingleSample.wdl b/wdl/Module09VisualizeSingleSample.wdl deleted file mode 100644 index f29d4575f..000000000 --- a/wdl/Module09VisualizeSingleSample.wdl +++ /dev/null @@ -1,135 +0,0 @@ -version 1.0 - -import "Structs.wdl" -import "RdTestVisualization.wdl" as rdtest -import "IGVTrioPlotsAllSamples.wdl" as igv_trio -import "IGVGeneratePlotsAllSamples.wdl" as igv_individual - -workflow Module09VisualizeSingleSample{ - input{ - File Fasta - File Fasta_idx - File Fasta_dict - - File varfile - File pedfile - String flags - String prefix - File batch_bincov - File sample_batches - - Array[File] medianfile - Array[String] sample_list - Array[File] cram_list - Array[File] crai_list - - String sv_base_mini_docker - String sv_pipeline_docker - String igv_docker - - RuntimeAttr? runtime_attr_override - RuntimeAttr? runtime_attr_concatinate - RuntimeAttr? runtime_attr_rdtest - } - - call rdtest.RdTestVisualization as RdTest{ - input: - flags = flags, - prefix = prefix, - bed = varfile, - pedfile = pedfile, - medianfile = medianfile, - batch_bincov=batch_bincov, - sample_batches = sample_batches, - sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_rdtest=runtime_attr_rdtest - } - - call igv_individual.IGV_all_samples as igv_plots { - input: - prefix = prefix, - varfile = varfile, - Fasta = Fasta, - Fasta_dict = Fasta_dict, - Fasta_idx = Fasta_idx, - samples = sample_list, - crams = cram_list, - crams_idx = crai_list, - sv_base_mini_docker = sv_base_mini_docker, - igv_docker = igv_docker, - runtime_attr_override=runtime_attr_override - } - - call concatinate_plots{ - input: - rd_plots = RdTest.Plots, - igv_plots = igv_plots.tar_gz_pe, - prefix = prefix, - varfile = varfile, - pedfile = pedfile, - igv_docker = igv_docker, - runtime_attr_concatinate = runtime_attr_concatinate - } - - output{ - File concatinated_plots = concatinate_plots.plots - } -} - -task concatinate_plots{ - input{ - File rd_plots - File igv_plots - String prefix - File varfile - File pedfile - String igv_docker - RuntimeAttr? runtime_attr_concatinate - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 7.5, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_concatinate, default_attr]) - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: igv_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } - - command <<< - set -eu -o pipefail - - tar -zxf ~{rd_plots} - tar -zxf ~{igv_plots} - mkdir ~{prefix}_igv_rdtest_plots - echo 'test' - python3 /src/MakeRDtest.py \ - ~{varfile} \ - ~{pedfile} \ - ~{prefix} \ - 10000000 \ - ~{prefix}_igv_plots \ - ~{prefix}_rd_plots \ - ~{prefix}_igv_rdtest_plots - tar -czf ~{prefix}_igv_rdtest_plots.tar.gz ~{prefix}_igv_rdtest_plots - >>> - - output{ - File plots = "~{prefix}_igv_rdtest_plots.tar.gz" - } - - -} - diff --git a/wdl/Module09VisualizeTrio.wdl b/wdl/Module09VisualizeTrio.wdl deleted file mode 100755 index b53cb646f..000000000 --- a/wdl/Module09VisualizeTrio.wdl +++ /dev/null @@ -1,143 +0,0 @@ -version 1.0 - -import "Structs.wdl" -import "RdTestVisualization.wdl" as rdtest -import "IGVTrioPlotsAllSamples.wdl" as igv_trio - -workflow Module09VisualizeTrio{ - input{ - File Fasta - File Fasta_idx - File Fasta_dict - - File varfile - File pedfile - String flags - String prefix - File batch_bincov - File sample_batches - - Array[File] medianfile - Array[String] pb_list - Array[String] fa_list - Array[String] mo_list - Array[File] pb_cram_list - Array[File] pb_crai_list - Array[File] fa_cram_list - Array[File] fa_crai_list - Array[File] mo_cram_list - Array[File] mo_crai_list - - String sv_base_mini_docker - String sv_pipeline_docker - String igv_docker - - RuntimeAttr? runtime_attr_override - RuntimeAttr? runtime_attr_concatinate - RuntimeAttr? runtime_attr_rdtest - } - call rdtest.RdTestVisualization as RdTest{ - input: - prefix = prefix, - medianfile = medianfile, - pedfile = pedfile, - batch_bincov=batch_bincov, - bed = varfile, - sv_pipeline_docker=sv_pipeline_docker, - sample_batches = sample_batches, - flags = flags, - runtime_attr_rdtest=runtime_attr_rdtest - - } - call igv_trio.IGV_all_samples as igv_plots { - input: - pb_list = pb_list, - fa_list = fa_list, - mo_list = mo_list, - pb_cram_list = pb_cram_list, - pb_crai_list = pb_crai_list, - fa_cram_list = fa_cram_list, - fa_crai_list = fa_crai_list, - mo_cram_list = mo_cram_list, - mo_crai_list = mo_crai_list, - varfile = varfile, - Fasta = Fasta, - Fasta_dict = Fasta_dict, - Fasta_idx = Fasta_idx, - prefix = prefix, - sv_base_mini_docker = sv_base_mini_docker, - igv_docker = igv_docker, - runtime_attr_override=runtime_attr_override - } - call concatinate_plots{ - input: - rd_plots = RdTest.Plots, - igv_plots = igv_plots.tar_gz_pe, - prefix = prefix, - varfile = varfile, - pedfile = pedfile, - igv_docker = igv_docker, - runtime_attr_concatinate = runtime_attr_concatinate - } - output{ - File concatinated_plots = concatinate_plots.plots - } -} - -task concatinate_plots{ - input{ - File rd_plots - File igv_plots - String prefix - File varfile - File pedfile - String igv_docker - RuntimeAttr? runtime_attr_concatinate - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 7.5, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_concatinate, default_attr]) - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: igv_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } - - command <<< - set -eu -o pipefail - - tar -zxf ~{rd_plots} - tar -zxf ~{igv_plots} - mkdir ~{prefix}_igv_rdtest_plots - echo 'test' - python3 /src/MakeRDtest.py \ - ~{varfile} \ - ~{pedfile} \ - ~{prefix} \ - 10000000 \ - ~{prefix}_igv_plots \ - ~{prefix}_rd_plots/ \ - ~{prefix}_igv_rdtest_plots - tar -czf ~{prefix}_igv_rdtest_plots.tar.gz ~{prefix}_igv_rdtest_plots - >>> - - output{ - File plots = "~{prefix}_igv_rdtest_plots.tar.gz" - } - - -} - diff --git a/wdl/Module10AnnotateILFeatures.wdl b/wdl/Module10AnnotateILFeatures.wdl deleted file mode 100644 index 2eb3ca562..000000000 --- a/wdl/Module10AnnotateILFeatures.wdl +++ /dev/null @@ -1,238 +0,0 @@ -version 1.0 - -import "Structs.wdl" -import "AnnotateILFeatures.wdl" as anno_il - -workflow Module10AnnoILFeatures { - input { - File cleanVcf - - Array[String] prefix - Array[String] samples - Array[String] il_bams - Array[String] il_bam_bais - - Array[File] pe_metrics - Array[File] pe_indexes - Array[File] sr_metrics - Array[File] sr_indexes - Array[File] rd_metrics - Array[File] rd_indexes - Array[File] raw_mantas - Array[File] raw_whams - Array[File] raw_melts - - File denovo_list - File ref_SegDup - File ref_SimpRep - File ref_RepMask - - File ref_fasta - File ref_fai - File ref_dict - File contig_list - - String rdpesr_benchmark_docker - String vapor_docker - String duphold_docker - String sv_base_mini_docker - String sv_pipeline_docker - - RuntimeAttr? runtime_attr_Vapor - RuntimeAttr? runtime_attr_duphold - RuntimeAttr? runtime_attr_rdpesr - RuntimeAttr? runtime_attr_bcf2vcf - RuntimeAttr? runtime_attr_LocalizeCram - RuntimeAttr? runtime_attr_vcf2bed - RuntimeAttr? runtime_attr_SplitVcf - RuntimeAttr? runtime_attr_ConcatBeds - RuntimeAttr? runtime_attr_ConcatVcfs - RuntimeAttr? runtime_inte_anno - } - - scatter(i in range(length(prefix))){ - call split_per_sample_vcf{ - input: - vcf = cleanVcf, - sample = samples[i], - sv_pipeline_docker = sv_pipeline_docker - } - call anno_il.AnnoILFeatures as anno_il_features{ - input: - prefix = samples[i], - il_bam = il_bams[i], - il_bam_bai = il_bam_bais[i], - vcf_file = split_per_sample_vcf.vcf_file, - pe_matrix = pe_metrics[i], - pe_index = pe_indexes[i], - sr_matrix = sr_metrics[i], - sr_index = sr_indexes[i], - rd_matrix = rd_metrics[i], - rd_index = rd_indexes[i], - ref_SegDup = ref_SegDup, - ref_SimpRep = ref_SimpRep, - ref_RepMask = ref_RepMask, - ref_fasta = ref_fasta, - ref_fai = ref_fai, - ref_dict = ref_dict, - contig_list = contig_list, - raw_vcfs = [raw_mantas[i],raw_whams[i],raw_melts[i]], - raw_algorithms = ["manta","wham","melt"], - - rdpesr_benchmark_docker = rdpesr_benchmark_docker, - vapor_docker = vapor_docker, - duphold_docker = duphold_docker, - sv_base_mini_docker = sv_base_mini_docker, - sv_pipeline_docker = sv_pipeline_docker, - - runtime_attr_Vapor = runtime_attr_Vapor, - runtime_attr_duphold = runtime_attr_duphold, - runtime_attr_rdpesr = runtime_attr_rdpesr, - runtime_attr_bcf2vcf = runtime_attr_bcf2vcf, - runtime_attr_LocalizeCram = runtime_attr_LocalizeCram, - runtime_attr_vcf2bed = runtime_attr_vcf2bed, - runtime_attr_SplitVcf = runtime_attr_SplitVcf, - runtime_attr_ConcatBeds = runtime_attr_ConcatBeds, - runtime_attr_ConcatVcfs = runtime_attr_ConcatVcfs - } - - call IntegrateAnno{ - input: - prefix = prefix[i], - sample = samples[i], - gc_anno = anno_il_features.GCAnno, - duphold_il = anno_il_features.duphold_vcf_il, - duphold_il_le = anno_il_features.duphold_vcf_il_le, - duphold_il_ri = anno_il_features.duphold_vcf_il_ri, - pesr_anno = anno_il_features.PesrAnno, - rd_anno = anno_il_features.RdAnno, - rd_le_anno = anno_il_features.RdAnno_le, - rd_ri_anno = anno_il_features.RdAnno_ri, - gt_anno = anno_il_features.GTGQ, - info_anno = anno_il_features.vcf_info, - raw_manta = anno_il_features.vs_raw[0], - raw_wham = anno_il_features.vs_raw[1], - raw_melt = anno_il_features.vs_raw[2], - de_novo = denovo_list, - rdpesr_benchmark_docker = rdpesr_benchmark_docker, - runtime_attr_override = runtime_inte_anno - } - } - - output{ - Array[File] integrated_file = IntegrateAnno.anno_file - } -} - - -task split_per_sample_vcf{ - input{ - File vcf - String sample - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 1, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - - output { - File vcf_file = "~{sample}.vcf.gz" - File vcf_idx ="~{sample}.vcf.gz.tbi" - } - command <<< - - set -Eeuo pipefail - - bcftools view -s ~{sample} ~{vcf} | grep -v "0/0" | bgzip > ~{sample}.vcf.gz - tabix -p vcf ~{sample}.vcf.gz - - >>> - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task IntegrateAnno{ - input{ - File gc_anno - File duphold_il - File duphold_il_le - File duphold_il_ri - File rd_anno - File rd_le_anno - File rd_ri_anno - File pesr_anno - File info_anno - File gt_anno - File raw_manta - File raw_wham - File raw_melt - File de_novo - String prefix - String sample - String rdpesr_benchmark_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 1, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - output{ - File anno_file = "~{prefix}.anno.bed.gz" - } - - command <<< - zcat ~{rd_anno} | grep ~{sample} > tmp.rd_anno - zcat ~{pesr_anno} | grep ~{sample} > tmp.pesr_anno - Rscript /src/integrate_annotations.R \ - --gc_anno ~{gc_anno} \ - --duphold_il ~{duphold_il} \ - --duphold_il_le ~{duphold_il_le} \ - --duphold_il_ri ~{duphold_il_ri} \ - --rd_le ~{rd_le_anno} \ - --rd_ri ~{rd_ri_anno} \ - --rd tmp.rd_anno \ - --pesr tmp.pesr_anno \ - --info ~{info_anno} \ - --gt ~{gt_anno} \ - --raw_manta ~{raw_manta} \ - --raw_wham ~{raw_wham} \ - --raw_melt ~{raw_melt} \ - --denovo ~{de_novo} \ - --output ~{prefix}.anno.bed - bgzip ~{prefix}.anno.bed - >>> - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: rdpesr_benchmark_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} diff --git a/wdl/Module10AnnotateRdPeSr.wdl b/wdl/Module10AnnotateRdPeSr.wdl deleted file mode 100644 index 6ef5c0299..000000000 --- a/wdl/Module10AnnotateRdPeSr.wdl +++ /dev/null @@ -1,70 +0,0 @@ -version 1.0 - -import "Structs.wdl" -import "AnnoRdPeSr.wdl" as anno_pesrrd - -workflow Module10AnnotateRdPeSr { - input { - Array[String] prefixes - Array[String] samples - - Array[File] beds - Array[File] bed_le_flanks - Array[File] bed_ri_flanks - - Array[File] pe_metrics - Array[File] pe_indexes - Array[File] sr_metrics - Array[File] sr_indexes - Array[File] rd_metrics - Array[File] rd_indexes - - File contig_list - - String rdpesr_benchmark_docker - String sv_base_mini_docker - String sv_pipeline_docker - - RuntimeAttr? runtime_attr_Vapor - RuntimeAttr? runtime_attr_duphold - RuntimeAttr? runtime_attr_rdpesr - RuntimeAttr? runtime_attr_bcf2vcf - RuntimeAttr? runtime_attr_LocalizeCram - RuntimeAttr? runtime_attr_vcf2bed - RuntimeAttr? runtime_attr_SplitVcf - RuntimeAttr? runtime_attr_ConcatBeds - RuntimeAttr? runtime_attr_ConcatVcfs - RuntimeAttr? runtime_inte_anno - } - - scatter(i in range(length(prefixes))){ - call anno_pesrrd.AnnoRdPeSr as anno_rd_pe_sr{ - input: - prefix = prefixes[i], - sample = samples[i], - bed = beds[i], - bed_le_flank = bed_le_flanks[i], - bed_ri_flank = bed_ri_flanks[i], - pe_matrix = pe_metrics[i], - pe_index = pe_indexes[i], - sr_matrix = sr_metrics[i], - sr_index = sr_indexes[i], - rd_matrix = rd_metrics[i], - rd_index = rd_indexes[i], - contig_list = contig_list, - rdpesr_benchmark_docker=rdpesr_benchmark_docker, - sv_base_mini_docker = sv_base_mini_docker, - sv_pipeline_docker = sv_pipeline_docker - - } - } - - output{ - Array[File] rd_anno = anno_rd_pe_sr.RdAnno - Array[File] rd_anno_le = anno_rd_pe_sr.RdAnno_le - Array[File] rd_anno_ri = anno_rd_pe_sr.RdAnno_ri - Array[File] pesr_anno = anno_rd_pe_sr.PesrAnno - } -} - - diff --git a/wdl/Module10Benchmark.wdl b/wdl/Module10Benchmark.wdl deleted file mode 100644 index dae6f999f..000000000 --- a/wdl/Module10Benchmark.wdl +++ /dev/null @@ -1,550 +0,0 @@ -version 1.0 - -import "Structs.wdl" -import "TasksMakeCohortVcf.wdl" as MiniTasks -import "TasksBenchmark.wdl" as tasks10 - -# WARNING: This workflow is potentially very expensive! Start small and scale gradually, or consider running the -# subworkflows separately. - -workflow BenchmarkAnnotation { - input { - String prefix - String il_bam - String il_bam_bai - String pb_bam - String pb_bam_bai - File vcf_file - - File pe_metrics - File sr_metrics - File rd_metrics - - File ref_fasta - File ref_fai - File ref_dict - File contig_list - - String pacbio_benchmark_docker - String vapor_docker - String duphold_docker - String sv_base_mini_docker - String sv_pipeline_docker - - RuntimeAttr? runtime_attr_Vapor - RuntimeAttr? runtime_attr_duphold - RuntimeAttr? runtime_attr_rdpesr - RuntimeAttr? runtime_attr_bcf2vcf - RuntimeAttr? runtime_attr_LocalizeCram - RuntimeAttr? runtime_attr_vcf2bed - RuntimeAttr? runtime_attr_SplitVcf - RuntimeAttr? runtime_attr_ConcatBeds - RuntimeAttr? runtime_attr_ConcatVcfs - - } - - Array[String] contigs = transpose(read_tsv(contig_list))[0] - scatter ( contig in contigs ) { - - call tasks10.LocalizeCram as LocalizeCramPB{ - input: - contig = contig, - ref_fasta=ref_fasta, - ref_fai=ref_fai, - ref_dict=ref_dict, - bam_or_cram_file=pb_bam, - bam_or_cram_index=pb_bam_bai, - sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_override=runtime_attr_LocalizeCram - } - - call tasks10.SplitVcf as SplitVcf{ - input: - contig = contig, - vcf_file = vcf_file, - sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_override=runtime_attr_SplitVcf - } - - call tasks10.vcf2bed as vcf2bed{ - input: - vcf = SplitVcf.contig_vcf, - vcf_index = SplitVcf.contig_vcf_index, - sv_pipeline_docker = sv_pipeline_docker, - runtime_attr_override = runtime_attr_vcf2bed - } - - call RunDupholdPerContig as RunDupholdPerContigPB{ - input: - prefix = prefix, - contig = contig, - bam_or_cram_file=LocalizeCramPB.local_bam, - bam_or_cram_index=LocalizeCramPB.local_bai, - vcf_file = SplitVcf.contig_vcf, - vcf_index = SplitVcf.contig_vcf_index, - ref_fasta = ref_fasta, - ref_fai = ref_fai, - ref_dict = ref_dict, - pacbio_benchmark_docker = duphold_docker, - runtime_attr_override = runtime_attr_duphold - } - - call Bcf2Vcf as Bcf2VcfPB{ - input: - prefix = prefix, - contig = contig, - bcf = RunDupholdPerContigPB.bcf, - sv_base_mini_docker = sv_base_mini_docker, - runtime_attr_override = runtime_attr_bcf2vcf - } - - call RunVapor{ - input: - prefix = prefix, - contig = contig, - bam_or_cram_file=LocalizeCramPB.local_bam, - bam_or_cram_index=LocalizeCramPB.local_bai, - bed = vcf2bed.bed, - ref_fasta = ref_fasta, - ref_fai = ref_fai, - ref_dict = ref_dict, - pacbio_benchmark_docker = vapor_docker, - runtime_attr_override = runtime_attr_Vapor - } - - call RunRdPeSrAnnotation{ - input: - prefix = prefix, - contig = contig, - bam_or_cram_file=LocalizeCramPB.local_bam, - bam_or_cram_index=LocalizeCramPB.local_bai, - bed = vcf2bed.bed, - pe_metrics = pe_metrics, - sr_metrics = sr_metrics, - rd_metrics = rd_metrics, - ref_fasta = ref_fasta, - ref_fai = ref_fai, - ref_dict=ref_dict, - pacbio_benchmark_docker = pacbio_benchmark_docker, - runtime_attr_override = runtime_attr_rdpesr - } - - call tasks10.LocalizeCramRequestPay as LocalizeCramIL{ - input: - contig = contig, - ref_fasta=ref_fasta, - ref_fai=ref_fai, - ref_dict=ref_dict, - project_id="talkowski-sv-gnomad", - bam_or_cram_file=il_bam, - bam_or_cram_index=il_bam_bai, - sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_override=runtime_attr_LocalizeCram - } - - call RunDupholdPerContig as RunDupholdPerContigIL{ - input: - prefix = prefix, - contig = contig, - bam_or_cram_file=LocalizeCramIL.local_bam, - bam_or_cram_index=LocalizeCramIL.local_bai, - vcf_file = SplitVcf.contig_vcf, - vcf_index = SplitVcf.contig_vcf_index, - ref_fasta = ref_fasta, - ref_fai = ref_fai, - ref_dict = ref_dict, - pacbio_benchmark_docker = duphold_docker, - runtime_attr_override = runtime_attr_duphold - } - - call Bcf2Vcf as Bcf2VcfIL{ - input: - prefix = prefix, - contig = contig, - bcf = RunDupholdPerContigIL.bcf, - sv_base_mini_docker = sv_base_mini_docker, - runtime_attr_override = runtime_attr_bcf2vcf - } - } - - call MiniTasks.ConcatVcfs as ConcatVcfsPB{ - input: - vcfs=Bcf2VcfPB.vcf, - outfile_prefix="~{prefix}.PB", - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_attr_ConcatVcfs - } - - call MiniTasks.ConcatVcfs as ConcatVcfsIL{ - input: - vcfs=Bcf2VcfIL.vcf, - outfile_prefix="~{prefix}.PB", - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_attr_ConcatVcfs - } - - call MiniTasks.ConcatBeds as ConcatBeds{ - input: - shard_bed_files=RunVapor.vapor, - prefix=prefix, - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_attr_ConcatBeds - } - - call MiniTasks.ConcatBeds as ConcatPesrAnno{ - input: - shard_bed_files=RunRdPeSrAnnotation.pesr_anno, - prefix=prefix, - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_attr_ConcatBeds - } - - call MiniTasks.ConcatBeds as ConcatRdAnno{ - input: - shard_bed_files=RunRdPeSrAnnotation.cov, - prefix=prefix, - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_attr_ConcatBeds - } - - call MiniTasks.ConcatBeds as ConcatRdAnnoLeFlank{ - input: - shard_bed_files=RunRdPeSrAnnotation.cov_le_flank, - prefix=prefix, - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_attr_ConcatBeds - } - - call MiniTasks.ConcatBeds as ConcatRdAnnoRiFlank{ - input: - shard_bed_files=RunRdPeSrAnnotation.cov_ri_flank, - prefix=prefix, - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_attr_ConcatBeds - } - - output{ - File duphold_vcf_pb = ConcatVcfsPB.concat_vcf - File duphold_vcf_il = ConcatVcfsIL.concat_vcf - File vapor_bed = ConcatBeds.merged_bed_file - File PesrAnno = ConcatPesrAnno.merged_bed_file - File RdAnno = ConcatRdAnno.merged_bed_file - File RdAnnoLeFlank = ConcatRdAnnoLeFlank.merged_bed_file - File RdAnnoRiFlank = ConcatRdAnnoRiFlank.merged_bed_file - } - } - -task RunDupholdPerContig{ - input{ - String prefix - String contig - File bam_or_cram_file - File bam_or_cram_index - File vcf_file - File vcf_index - File ref_fasta - File ref_fai - File ref_dict - String pacbio_benchmark_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 10, - disk_gb: 100, - boot_disk_gb: 10, - preemptible_tries: 0, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - - output { - File bcf = "~{prefix}.~{contig}.bcf" - } - command <<< - - set -Eeuo pipefail - - duphold -t 4 \ - -v ~{vcf_file} \ - -b ~{bam_or_cram_file} \ - -f ~{ref_fasta} \ - -o ~{prefix}.~{contig}.bcf - - >>> - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: pacbio_benchmark_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task vcf2bed{ - input{ - File vcf - File vcf_index - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 10, - disk_gb: 100, - boot_disk_gb: 10, - preemptible_tries: 0, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - String filename = basename(vcf, ".vcf.gz") - - output { - File bed = "${filename}.bed" - } - - command <<< - - set -Eeuo pipefail - - svtk vcf2bed -i SVTYPE -i SVLEN ~{vcf} tmp1.bed - - cat \ - <(awk '{if ($5=="DEL") print}' tmp1.bed | cut -f1-5) \ - <(awk '{if ($5=="DUP") print}' tmp1.bed | cut -f1-5) \ - <(awk '{if ($5=="INV") print}' tmp1.bed | cut -f1-5) \ - > ${filename}.bed - - paste -d '_' \ - <(awk '{if ($5=="INS") print}' tmp1.bed | cut -f1-5) \ - <(awk '{if ($5=="INS") print}' tmp1.bed | cut -f6) \ - >> ${filename}.bed - - >>> - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task Bcf2Vcf{ - input{ - String prefix - String contig - File bcf - String sv_base_mini_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 5, - boot_disk_gb: 5, - preemptible_tries: 0, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - output{ - File vcf = "~{prefix}.~{contig}.duphold.vcf.gz" - } - command <<< - set -Eeuo pipefail - bcftools view ~{bcf} | bgzip > ~{prefix}.~{contig}.duphold.vcf.gz - >>> - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_base_mini_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task RunDuphold{ - input{ - String prefix - File bam_or_cram_file - File bam_or_cram_index - File vcf_file - File ref_fasta - File ref_fai - File ref_dict - String pacbio_benchmark_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 10, - disk_gb: 100, - boot_disk_gb: 10, - preemptible_tries: 0, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - - output { - File bcf = "~{prefix}.bcf" - } - command <<< - - set -Eeuo pipefail - - duphold -t 4 \ - -v ~{vcf_file} \ - -b ~{bam_or_cram_file} \ - -f ~{ref_fasta} \ - -o ~{prefix}.bcf - - >>> - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: pacbio_benchmark_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task RunVapor{ - input{ - String prefix - String contig - File bam_or_cram_file - File bam_or_cram_index - File bed - File ref_fasta - File ref_fai - File ref_dict - String pacbio_benchmark_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 5, - boot_disk_gb: 10, - preemptible_tries: 0, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - output { - File vapor = "~{bed}.vapor" - File vapor_plot = "~{prefix}.~{contig}.tar.gz" - } - - command <<< - - set -Eeuo pipefail - - mkdir ~{prefix}.~{contig} - - vapor bed \ - --sv-input ~{bed} \ - --output-path ~{prefix}.~{contig} \ - --reference ~{ref_fasta} \ - --pacbio-input ~{bam_or_cram_index} \ - - tar -czf ~{prefix}.~{contig}.tar.gz ~{prefix}.~{contig} - >>> - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: pacbio_benchmark_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task RunRdPeSrAnnotation{ - input{ - String prefix - String contig - File bam_or_cram_file - File bam_or_cram_index - File bed - File pe_metrics - File sr_metrics - File rd_metrics - File ref_fasta - File ref_fai - File ref_dict - String pacbio_benchmark_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 5, - boot_disk_gb: 10, - preemptible_tries: 0, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - String filename = basename(bed, '.bed') - - output { - File pesr_anno = "~{bed}.INS_with_SR_PE" - File cov = "~{bed}.Seq_Cov" - File cov_ri_flank = "~{bed}.ri_flank.Seq_Cov" - File cov_le_flank = "~{bed}.le_flank.Seq_Cov" - } - - command <<< - - set -Eeuo pipefail - Rscript /modify_bed_for_PE_SR_RD_labeling.R -i ~{bed} - - python /add_SR_PE_to_PB_INS.V2.py ~{bed} ~{pe_metrics} ~{sr_metrics} - - zcat ~{rd_metrics} | grep -v '@' | grep -v CONTIG |bgzip > bincov.tsv.gz - Rscript /bincov_to_normCov.R -i bincov.tsv.gz - bgzip normCov.tsv - tabix normCov.tsv.gz - - python /add_RD_to_SVs.py ~{bed} normCov.tsv.gz - python /add_RD_to_SVs.py ~{filename}.ri_flank normCov.tsv.gz - python /add_RD_to_SVs.py ~{filename}.le_flank normCov.tsv.gz - - >>> - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: pacbio_benchmark_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - - diff --git a/wdl/Mosaic.wdl b/wdl/Mosaic.wdl deleted file mode 100644 index 04efcf8c3..000000000 --- a/wdl/Mosaic.wdl +++ /dev/null @@ -1,193 +0,0 @@ -########################## -## EXPERIMENTAL WORKFLOW -########################## - -# To obtains list of likely mosaic variants that failed RF due to separation only - -version 1.0 - -import "Structs.wdl" -import "TasksMakeCohortVcf.wdl" as MiniTasks -import "PreRFCohort.wdl" as preRF -import "MosaicDepth.wdl" as depth_mosaic -import "MosaicPesrPart1.wdl" as mosaic_pesr_part1 -import "MosaicPesrPart2.wdl" as mosaic_pesr_part2 - -workflow MosaicManualCheck{ - input{ - File fam_file - Int rare_cutoff - File outlier - String prefix - - Array[File] per_batch_clustered_pesr_vcf_list # preRF - Array[File] clustered_depth_vcfs - Array[File] coverage_files - Array[File] coverage_file_idxs - Array[File] median_files - - Array[File] agg_metrics - Array[File] RF_cutoffs - - String sv_pipeline_docker - String sv_base_mini_docker - - RuntimeAttr? runtime_attr_concat_depth_bed - RuntimeAttr? runtime_attr_concat_pesr_bed - RuntimeAttr? runtime_attr_concat_depth_plot - RuntimeAttr? runtime_attr_concat_pesr_plot - } - scatter (i in range(length(per_batch_clustered_pesr_vcf_list))) { - call mosaic_pesr_part1.Mosaic as pesr1{ - input: - name=basename(clustered_depth_vcfs[i]), - pesr_vcfs=read_lines(per_batch_clustered_pesr_vcf_list[i]), - metrics=agg_metrics[i], - cutoffs=RF_cutoffs[i], - coverage_file=coverage_files[i], - coverage_file_idx=coverage_file_idxs[i], - fam_file=fam_file, - median_file=median_files[i], - sv_pipeline_docker=sv_pipeline_docker - - } - } - scatter (i in range(length(clustered_depth_vcfs))) { - call depth_mosaic.Mosaic as depth{ - input: - name=basename(clustered_depth_vcfs[i]), - metrics=agg_metrics[i], - cutoffs=RF_cutoffs[i], - rare_cutoff=rare_cutoff, - depth_vcf=clustered_depth_vcfs[i], - lookup=LookupGen.depthlookup, - coverage_file=coverage_files[i], - coverage_file_idx=coverage_file_idxs[i], - fam_file=fam_file, - median_file=median_files[i], - sv_pipeline_docker=sv_pipeline_docker - - } - } - call preRF.make_cohort_VCFs as LookupGen { - input: - pesr_vcfs = pesr1.merged_pesr, - depth_vcfs = clustered_depth_vcfs, - sv_pipeline_docker=sv_pipeline_docker - } - scatter (i in range(length(pesr1.common_potential))) { - call mosaic_pesr_part2.Mosaic as pesr2{ - input: - name=basename(pesr1.common_potential[i]), - outlier=outlier, - rare_cutoff=rare_cutoff, - lookup=LookupGen.pesrlookup, - potential=pesr1.common_potential[i], - coverage_file=coverage_files[i], - coverage_file_idx=coverage_file_idxs[i], - fam_file=fam_file, - median_file=median_files[i], - sv_pipeline_docker=sv_pipeline_docker - } - } - - call MiniTasks.ConcatBeds as concat_depth_bed{ - input: - shard_bed_files = depth.rare_potential, - prefix = "~{prefix}.depth", - sv_base_mini_docker = sv_base_mini_docker, - runtime_attr_override = runtime_attr_concat_depth_bed - } - - call MiniTasks.ConcatBeds as concat_pesr_bed{ - input: - shard_bed_files = pesr2.potentialmosaic, - prefix = "~{prefix}.pesr", - sv_base_mini_docker = sv_base_mini_docker, - runtime_attr_override = runtime_attr_concat_pesr_bed - } - - call ConcatPlots as concat_depth_plots{ - input: - shard_plots = depth.igvplots, - prefix = "~{prefix}.depth", - sv_base_mini_docker = sv_base_mini_docker, - runtime_attr_override = runtime_attr_concat_depth_plot - } - - call ConcatPlots as concat_pesr_plots{ - input: - shard_plots = pesr2.igvplots, - prefix = "~{prefix}.pesr", - sv_base_mini_docker = sv_base_mini_docker, - runtime_attr_override = runtime_attr_concat_pesr_plot - } - - output{ - File depth_bed = concat_depth_bed.merged_bed_file - File pesr_bed = concat_pesr_bed.merged_bed_file - File depth_plots = concat_depth_plots.merged_plots - File pesr_plots = concat_pesr_plots.merged_plots - } -} - - -# Merge plots from each shard: -task ConcatPlots { - input{ - Array[File] shard_plots - String prefix - String sv_base_mini_docker - RuntimeAttr? runtime_attr_override - } - - String output_file="~{prefix}.plot.tar.gz" - - Float input_size = size(shard_plots, "GB") - Float compression_factor = 5.0 - Float base_disk_gb = 5.0 - Float base_mem_gb = 2.0 - RuntimeAttr runtime_default = object { - mem_gb: base_mem_gb + compression_factor * input_size, - disk_gb: ceil(base_disk_gb + input_size * (2.0 + compression_factor)), - cpu_cores: 1, - preemptible_tries: 3, - max_retries: 1, - boot_disk_gb: 10 - } - - RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) - - runtime { - memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB" - disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" - cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) - preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) - maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) - docker: sv_base_mini_docker - bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) - } - - command <<< - set -eu - - # note head -n1 stops reading early and sends SIGPIPE to zcat, - # so setting pipefail here would result in early termination - mkdir output_folder/ - - # no more early stopping - set -o pipefail - - while read SPLIT; do - tar zxvf $SPLIT - mv plots/* output_folder/ - done < ~{write_lines(shard_plots)} \ - - tar zcvf ~{prefix}.plots.tar.gz output_folder/ - >>> - - output { - File merged_plots = "~{prefix}.plots.tar.gz" - } -} - diff --git a/wdl/MosaicDepth.wdl b/wdl/MosaicDepth.wdl deleted file mode 100644 index 74a4c28f8..000000000 --- a/wdl/MosaicDepth.wdl +++ /dev/null @@ -1,169 +0,0 @@ -version 1.0 - -import "Structs.wdl" - -workflow Mosaic{ - input{ - String name - Int rare_cutoff - File metrics - File cutoffs - File depth_vcf - File lookup - File coverage_file - File coverage_file_idx - File fam_file - File median_file - String sv_pipeline_docker - } - call GetPotential{ - input: - name=name, - lookup=lookup, - metrics=metrics, - rare_cutoff=rare_cutoff, - cutoffs=cutoffs, - depth_vcf=depth_vcf, - sv_pipeline_docker=sv_pipeline_docker - } - call RdTest{ - input: - bed=GetPotential.rare, - coverage_file=coverage_file, - coverage_file_idx=coverage_file_idx, - median_file=median_file, - fam_file=fam_file, - prefix=name, - sv_pipeline_docker=sv_pipeline_docker - } - output{ - File rare_potential=GetPotential.rare - File common_potential=GetPotential.common - File igvplots=RdTest.plots - File stats=RdTest.stats - } -} - -task GetPotential{ - input{ - String name - Int rare_cutoff - File metrics - File cutoffs - File lookup - File depth_vcf - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 8, - disk_gb: 100, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command<<< - set -euox pipefail - cut -f 1,2,3,4,36,37 ~{metrics} > phase3-1_7.rd.metrics - awk '{if ($1~"depth") print}' phase3-1_7.rd.metrics > phase3-1_7.depth.metrics - # Get the del/dup median separation cutoff from the cutoffs file - delmed=$(fgrep Depth ~{cutoffs}|fgrep Median|fgrep DEL |cut -f 2) - dupmed=$(fgrep Depth ~{cutoffs}|fgrep Median|fgrep DUP |cut -f 2) - delp=$(fgrep Depth ~{cutoffs}|fgrep RD_log_pval|fgrep DEL |cut -f 2) - dupp=$(fgrep Depth ~{cutoffs}|fgrep RD_log_pval|fgrep DUP |cut -f 2) - # Find variants that pass p value but not separation - awk -v delp="$delp" -v delmed="$delmed" '{if ($3=="DEL" && $5delp) print}' phase3-1_7.depth.metrics > del.potentialmosaic.txt - awk -v dupp="$dupp" -v dupmed="$dupmed" '{if ($3=="DUP" && $5dupp) print}' phase3-1_7.depth.metrics> dup.potentialmosaic.txt - cat del.potentialmosaic.txt dup.potentialmosaic.txt |cut -f1 > potentialmosaic.txt - tabix -f ~{depth_vcf} - tabix -H ~{depth_vcf} > head.txt - zcat ~{depth_vcf} |fgrep -w -f potentialmosaic.txt >body.txt - cat head.txt body.txt |bgzip -c > test.vcf.gz - bash /opt/sv-pipeline/04_variant_resolution/scripts/stitch_fragmented_calls.sh -x 1 test.vcf.gz test1.vcf.gz - bash /opt/sv-pipeline/04_variant_resolution/scripts/stitch_fragmented_calls.sh -x 1 test1.vcf.gz test2.vcf.gz - bash /opt/sv-pipeline/04_variant_resolution/scripts/stitch_fragmented_calls.sh -x 1 test2.vcf.gz test3.vcf.gz - svtk vcf2bed test3.vcf.gz ~{name}.potentialmosaic.bed - while read chr start end id type sample;do - n=$(zfgrep "$id:" ~{lookup}|cut -f 8)||true - if [ "$n" -eq "$n" ] ;then - if [ "$n" -lt ~{rare_cutoff} ]; then - printf "$chr\t$start\t$end\t$id\t$type\t$sample\n" - fi - fi - done<~{name}.potentialmosaic.bed > ~{name}.potentialmosaic.rare.bed - - echo -e "#chr\tstart\tend\tid\ttype\tsample" > header.bed - cat header.bed ~{name}.potentialmosaic.bed | bgzip > ~{name}.potentialmosaic.bed.gz - cat header.bed ~{name}.potentialmosaic.rare.bed | bgzip > ~{name}.potentialmosaic.rare.bed.gz - >>> - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } - output{ - File common="~{name}.potentialmosaic.bed.gz" - File rare = "~{name}.potentialmosaic.rare.bed.gz" - } -} -# Run rdtest plot -task RdTest { - input{ - File bed - String coverage_file - File coverage_file_idx - File median_file - File fam_file - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - - zcat ~{bed} | tail -n+2 > rdtest.bed - /opt/RdTest/localize_bincov.sh rdtest.bed ~{coverage_file} - awk -v OFS="\t" '{print $1,$2,$3,$4,$6,$5}' rdtest.bed > test.bed - - Rscript /opt/RdTest/RdTest.R \ - -b test.bed \ - -n ~{prefix} \ - -c local_coverage.bed.gz \ - -m ~{median_file} \ - -f ~{fam_file} \ - -p TRUE - mkdir plots - mv *jpg plots - tar -czvf mosaic.tar.gz plots/ - >>> - - output { - File stats = "~{prefix}.metrics" - File plots= "mosaic.tar.gz" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} diff --git a/wdl/MosaicPesrPart1.wdl b/wdl/MosaicPesrPart1.wdl deleted file mode 100644 index 25dd373e6..000000000 --- a/wdl/MosaicPesrPart1.wdl +++ /dev/null @@ -1,135 +0,0 @@ -version 1.0 - -import "Structs.wdl" - -workflow Mosaic{ - input{ - String name - Array[File] pesr_vcfs - File metrics - File cutoffs - File coverage_file - File coverage_file_idx - File fam_file - File median_file - String sv_pipeline_docker - } - call MergePesrVcfs { - input: - pesr_vcfs=pesr_vcfs, - batch=name, - sv_pipeline_docker=sv_pipeline_docker - } - call GetPotential{ - input: - name=name, - metrics=metrics, - cutoffs=cutoffs, - depth_vcf=MergePesrVcfs.merged_pesr_vcf, - sv_pipeline_docker=sv_pipeline_docker - } - output{ - File merged_pesr=MergePesrVcfs.merged_pesr_vcf - File common_potential=GetPotential.common - } - -} -task MergePesrVcfs { - input{ - Array[File] pesr_vcfs - String batch - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 8, - disk_gb: 100, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - for VCF in ~{sep=" " pesr_vcfs}; do - bcftools view --min-ac 1 $VCF |bgzip -c > temp.vcf.gz - mv temp.vcf.gz $VCF - done - - vcf-concat ~{sep=" " pesr_vcfs} \ - | vcf-sort -c \ - | bgzip -c > \ - ~{batch}.filtered_pesr_merged.vcf.gz - >>> - - output { - File merged_pesr_vcf = "~{batch}.filtered_pesr_merged.vcf.gz" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task GetPotential{ - input{ - String name - File metrics - File cutoffs - File depth_vcf - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 8, - disk_gb: 100, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command<<< - set -euo pipefail - cut -f 1,2,3,4,36,37,9,15,27,4 ~{metrics} > ~{name}.rd.metrics - awk '{if ($1!~"depth" && $4>5000) print}' ~{name}.rd.metrics|egrep "DUP|DEL" > ~{name}.depth.metrics - delmed=$(fgrep PESR ~{cutoffs}|fgrep Median|awk '{if($3=="RD_Median_Separation") print $2}') - dupmed=$delmed - delp=$(fgrep PESR ~{cutoffs}|fgrep RD_log_pval|awk '{if($3=="RD_log_pval") print $2}') - dupp=$delp - pe_p=$(fgrep PE_log_pval ~{cutoffs}|cut -f 2) - sr_p=$(fgrep SR_sum_log_pval ~{cutoffs}|cut -f 2) - pesr_p=$(fgrep PESR_log_pval ~{cutoffs}|cut -f 2) - awk -v delp="$delp" -v delmed="$delmed" -v pe_p="$pe_p" -v sr_p="$sr_p" -v pesr_p="$pesr_p" '{if ($3=="DEL" && $8delp ) print}' ~{name}.depth.metrics > del.potentialmosaic.txt - awk -v dupp="$dupp" -v dupmed="$dupmed" -v pe_p="$pe_p" -v sr_p="$sr_p" -v pesr_p="$pesr_p" '{if ($3=="DUP" && $8dupp ) print}' ~{name}.depth.metrics> dup.potentialmosaic.txt - cat del.potentialmosaic.txt dup.potentialmosaic.txt |cut -f1 > potentialmosaic.txt - tabix -f ~{depth_vcf} - tabix -H ~{depth_vcf} > head.txt - zcat ~{depth_vcf} |fgrep -w -f potentialmosaic.txt >body.txt - cat head.txt body.txt |bgzip -c > test.vcf.gz - bash /opt/sv-pipeline/04_variant_resolution/scripts/stitch_fragmented_calls.sh -x 1 test.vcf.gz test1.vcf.gz - bash /opt/sv-pipeline/04_variant_resolution/scripts/stitch_fragmented_calls.sh -x 1 test1.vcf.gz test2.vcf.gz - bash /opt/sv-pipeline/04_variant_resolution/scripts/stitch_fragmented_calls.sh -x 1 test2.vcf.gz test3.vcf.gz - svtk vcf2bed test3.vcf.gz ~{name}.potentialmosaic.bed - - >>> - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } - output{ - File common="~{name}.potentialmosaic.bed" - } -} diff --git a/wdl/MosaicPesrPart2.wdl b/wdl/MosaicPesrPart2.wdl deleted file mode 100644 index 9b3c030b0..000000000 --- a/wdl/MosaicPesrPart2.wdl +++ /dev/null @@ -1,144 +0,0 @@ -version 1.0 - -import "Structs.wdl" - -workflow Mosaic{ - input{ - String name - Int rare_cutoff - File outlier - File lookup - File potential - File coverage_file - File coverage_file_idx - File fam_file - File median_file - String sv_pipeline_docker - } - call GetPotential{ - input: - outlier=outlier, - rare_cutoff=rare_cutoff, - potential=potential, - name=name, - lookup=lookup, - sv_pipeline_docker=sv_pipeline_docker - } - call rdtest{ - input: - bed=GetPotential.rare, - coverage_file=coverage_file, - coverage_file_idx=coverage_file_idx, - median_file=median_file, - fam_file=fam_file, - prefix=name, - sv_pipeline_docker=sv_pipeline_docker - } - output{ - File potentialmosaic = GetPotential.rare - File igvplots = rdtest.plots - } -} - -task GetPotential{ - input{ - String name - Int rare_cutoff - File outlier - File potential - File lookup - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 8, - disk_gb: 100, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command<<< - set -euox pipefail - fgrep -v -f ~{outlier} ~{potential} >potential.txt - while read chr start end id type sample;do - n=$(zfgrep "$id:" ~{lookup}|cut -f 5) ||true - if [ "$n" -eq "$n" ] ;then - if [ "$n" -lt ~{rare_cutoff} ]; then - printf "$chr\t$start\t$end\t$id\t$type\t$sample\n" - fi - fi - done ~{name}.potentialmosaic.rare.bed - - echo -e "#chr\tstart\tend\tid\ttype\tsample" > header.bed - cat header.bed ~{name}.potentialmosaic.rare.bed | bgzip > ~{name}.potentialmosaic.rare.bed.gz - >>> - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } - output{ - File rare="~{name}.potentialmosaic.rare.bed.gz" - } -} -# Run rdtest -task rdtest { - input{ - File bed - String coverage_file - File coverage_file_idx - File median_file - File fam_file - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - - zcat ~{bed} | tail -n+2 > rdtest.bed - /opt/RdTest/localize_bincov.sh rdtest.bed ~{coverage_file} - awk -v OFS="\t" '{print $1,$2,$3,$4,$6,$5}' rdtest.bed > test.bed - - Rscript /opt/RdTest/RdTest.R \ - -b test.bed \ - -n ~{prefix} \ - -c local_coverage.bed.gz \ - -m ~{median_file} \ - -f ~{fam_file} \ - -p TRUE - mkdir plots - mv *jpg plots - tar -czvf mosaic.tar.gz plots/ - >>> - - output { - File stats = "~{prefix}.metrics" - File local_coverage = "local_coverage.bed.gz" - File plots= "mosaic.tar.gz" - } - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} diff --git a/wdl/PatchSRBothsidePass.wdl b/wdl/PatchSRBothsidePass.wdl deleted file mode 100644 index 7b6ad66ec..000000000 --- a/wdl/PatchSRBothsidePass.wdl +++ /dev/null @@ -1,133 +0,0 @@ -version 1.0 - -import "Utils.wdl" as utils -import "Structs.wdl" - -workflow PatchSRBothsidePass { - input { - Array[File] batch_sample_lists - File cohort_vcf - File updated_bothside_pass_list - String cohort_name - String contig - - File patch_script - - String sv_base_mini_docker - String sv_pipeline_docker - - RuntimeAttr? runtime_attr_get_non_ref_vids - RuntimeAttr? runtime_attr_calculate_support_frac - } - - scatter (i in range(length(batch_sample_lists))) { - call GetNonRefVariantLists { - input: - samples_list=batch_sample_lists[i], - cohort_vcf=cohort_vcf, - prefix="~{cohort_name}.~{contig}.non_ref_variants.shard_~{i}", - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_attr_get_non_ref_vids - } - } - - call RecalculateBothsideSupportFractions { - input: - patch_script=patch_script, - non_ref_vid_lists=GetNonRefVariantLists.out, - updated_bothside_pass_list=updated_bothside_pass_list, - num_batches=length(batch_sample_lists), - prefix="~{cohort_name}.~{contig}.sr_bothside_support.patched", - sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_override=runtime_attr_calculate_support_frac - } - - output { - File out = RecalculateBothsideSupportFractions.out - } -} - -task GetNonRefVariantLists { - input { - File samples_list - File cohort_vcf - String prefix - String sv_base_mini_docker - RuntimeAttr? runtime_attr_override - } - - Float input_size = size(cohort_vcf, "GB") - RuntimeAttr runtime_default = object { - mem_gb: 3.75, - disk_gb: ceil(10.0 + input_size), - cpu_cores: 1, - preemptible_tries: 3, - max_retries: 1, - boot_disk_gb: 10 - } - RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) - runtime { - memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB" - disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" - cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) - preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) - maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) - docker: sv_base_mini_docker - bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) - } - - command <<< - set -euo pipefail - bcftools view --samples-file ~{samples_list} ~{cohort_vcf} \ - | bcftools view -G -i 'SUM(AC)>0||SUM(FORMAT/SR_GT)>0' \ - | bcftools query -f '%ID\n' \ - > ~{prefix}.list - >>> - output { - File out = "~{prefix}.list" - } -} - -task RecalculateBothsideSupportFractions { - input { - File patch_script - Array[File] non_ref_vid_lists - File updated_bothside_pass_list - Int num_batches - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - Float input_size = size(non_ref_vid_lists, "GB") + size(updated_bothside_pass_list, "GB") - RuntimeAttr runtime_default = object { - mem_gb: 3.75, - disk_gb: ceil(10.0 + input_size * 2.0), - cpu_cores: 1, - preemptible_tries: 3, - max_retries: 1, - boot_disk_gb: 10 - } - RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) - runtime { - memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB" - disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" - cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) - preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) - maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) - docker: sv_pipeline_docker - bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) - } - - command <<< - set -euo pipefail - python ~{patch_script} \ - ~{write_lines(non_ref_vid_lists)} \ - ~{updated_bothside_pass_list} \ - ~{num_batches} \ - > ~{prefix}.txt - >>> - output { - File out = "~{prefix}.txt" - } -} \ No newline at end of file diff --git a/wdl/PatchSRBothsidePassScatter.wdl b/wdl/PatchSRBothsidePassScatter.wdl deleted file mode 100644 index ae4d77e6b..000000000 --- a/wdl/PatchSRBothsidePassScatter.wdl +++ /dev/null @@ -1,54 +0,0 @@ -version 1.0 - -import "Utils.wdl" as utils -import "PatchSRBothsidePass.wdl" as patch -import "Structs.wdl" - -workflow PatchSRBothsidePassScatter { - input { - Array[File] batch_vcfs - Array[File] cohort_contig_vcfs - Array[File] updated_bothside_pass_lists - String cohort_name - File contig_list - - File patch_script - - String sv_base_mini_docker - String sv_pipeline_docker - - RuntimeAttr? runtime_attr_get_sample_ids - RuntimeAttr? runtime_attr_get_non_ref_vids - RuntimeAttr? runtime_attr_calculate_support_frac - } - - scatter (i in range(length(batch_vcfs))) { - call utils.GetSampleIdsFromVcf { - input: - vcf=batch_vcfs[i], - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_attr_get_sample_ids - } - } - - Array[String] contigs = transpose(read_tsv(contig_list))[0] - scatter ( i in range(length(contigs)) ) { - call patch.PatchSRBothsidePass { - input: - batch_sample_lists=GetSampleIdsFromVcf.out_file, - cohort_vcf=cohort_contig_vcfs[i], - updated_bothside_pass_list=updated_bothside_pass_lists[i], - cohort_name=cohort_name, - contig=contigs[i], - patch_script=patch_script, - sv_base_mini_docker=sv_base_mini_docker, - sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_get_non_ref_vids=runtime_attr_get_non_ref_vids, - runtime_attr_calculate_support_frac=runtime_attr_calculate_support_frac - } - } - - output { - Array[File] out = PatchSRBothsidePass.out - } -} diff --git a/wdl/PreRFCohort.wdl b/wdl/PreRFCohort.wdl deleted file mode 100644 index 0163e10c1..000000000 --- a/wdl/PreRFCohort.wdl +++ /dev/null @@ -1,186 +0,0 @@ -version 1.0 - -import "Structs.wdl" - -workflow make_cohort_VCFs { - input{ - Array[File] pesr_vcfs - Array[File] depth_vcfs - String sv_pipeline_docker - } - call MergePESRVcfs { - input: - vcfs_list=write_lines(pesr_vcfs), - prefix="pesr", - sv_pipeline_docker=sv_pipeline_docker - } - call MergeDepthVcfs{ - input: - vcfs=depth_vcfs, - prefix="depth", - sv_pipeline_docker=sv_pipeline_docker - } - - output { - File pesrlookup = MergePESRVcfs.lookup - File depthlookup = MergeDepthVcfs.lookup - } -} - - -task MergePESRVcfs { - input{ - File vcfs_list - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 64, - disk_gb: 200, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - /opt/sv-pipeline/04_variant_resolution/scripts/merge_vcfs.sh ~{vcfs_list} ~{prefix} - while read vcf; do - svtk vcf2bed --no-header $vcf $vcf.bed - awk '{if($3-$2>5000) print $0}' $vcf.bed > test.bed;mv test.bed $vcf.bed - done < vcfs.list - cat *.bed |sort -k1,1V -k2,2n -k3,3n |bgzip -c > cohort.sort.bed.gz - #clustering - svtk vcf2bed ~{prefix}.vcf.gz ~{prefix}.vcf.gz.bed - awk '{if($3-$2>5000) print $0}' ~{prefix}.vcf.gz.bed >test.bed ; mv test.bed ~{prefix}.vcf.gz.bed - fgrep DEL ~{prefix}.vcf.gz.bed> del.bed - fgrep DUP ~{prefix}.vcf.gz.bed> dup.bed - svtk bedcluster del.bed |cut -f1-7 |awk '{print $0","}' > del.cluster.bed - svtk bedcluster dup.bed |cut -f1-7 |awk '{print $0","}' > dup.cluster.bed - cat del.cluster.bed dup.cluster.bed |sort -k1,1V -k2,2n -k3,3n |fgrep -v "#"> cluster.bed - #harrison's - zcat cohort.sort.bed.gz | awk '{a[$1"@"$2"@"$3]=a[$1"@"$2"@"$3]?a[$1"@"$2"@"$3]":"$4:$4;b[$1"@"$2"@"$3]=b[$1"@"$2"@"$3]?b[$1"@"$2"@"$3]","$6:$6;}END{for (i in a)print i "\t" a[i] "\t" b[i];}'|tr '@' '\t'|bgzip>pesr.combined.gz - zcat pesr.combined.gz |awk -F "," ' { print $0"\t"NF } ' |awk -v OFS="\t" '{print $1,$2,$3,$4":",$6}' |bgzip -c > pesr.lookup.gz - >>> - output { - File cohort_sort = "cohort.sort.bed.gz" - File cluster="cluster.bed" - File lookup="pesr.lookup.gz" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task MergeDepthVcfs { - input { - Array[File] vcfs - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 16, - disk_gb: 100, - boot_disk_gb: 10, - preemptible_tries: 0, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - /opt/sv-pipeline/04_variant_resolution/scripts/merge_vcfs.py ~{write_lines(vcfs)} ~{prefix}.vcf - vcf-sort -c ~{prefix}.vcf | bgzip -c > ~{prefix}.vcf.gz - while read vcf; do - local_vcf=$(basename $vcf) - svtk vcf2bed --no-header $vcf $local_vcf.bed # for each depth vcf make bed, duplicated - done < ~{write_lines(vcfs)} - cat *.bed |sort -k1,1V -k2,2n -k3,3n> cohort.sort.bed # concat raw depth vcf, duplicated - svtk vcf2bed ~{prefix}.vcf.gz ~{prefix}.vcf.gz.bed # vcf2bed merge_vcfs, non_duplicated - fgrep DEL ~{prefix}.vcf.gz.bed> del.bed # del non duplicated - fgrep DUP ~{prefix}.vcf.gz.bed> dup.bed # dup non duplicated - svtk bedcluster del.bed |cut -f1-7 |awk '{print $0","}' > del.cluster.bed #cluster non_duplicated del - svtk bedcluster dup.bed |cut -f1-7 |awk '{print $0","}' > dup.cluster.bed #cluster non_duplicated dup - cat del.cluster.bed dup.cluster.bed |sort -k1,1V -k2,2n -k3,3n |fgrep -v "#"> cluster.bed #combine clusterd non-duplicated - python3 <>> - output { - File lookup = "master_cluster_dups.bed.gz" - } - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} diff --git a/wdl/RdPeSrAnno.wdl b/wdl/RdPeSrAnno.wdl deleted file mode 100644 index b81d55f49..000000000 --- a/wdl/RdPeSrAnno.wdl +++ /dev/null @@ -1,184 +0,0 @@ -version 1.0 - -import "Structs.wdl" -import "TasksMakeCohortVcf.wdl" as MiniTasks -import "TasksBenchmark.wdl" as tasks10 - -workflow RdPeSrAnno { - input { - String prefix - String bam_or_cram_file - String bam_or_cram_index - File vcf_file - File ref_fasta - File ref_fai - File ref_dict - File contig_list - File pe_metrics - File sr_metrics - File rd_metrics - String pesrrd_annotation_docker - String sv_base_mini_docker - String sv_pipeline_docker - RuntimeAttr? runtime_attr_rdpesr - RuntimeAttr? runtime_attr_bcf2vcf - RuntimeAttr? runtime_attr_LocalizeCram - RuntimeAttr? runtime_attr_vcf2bed - RuntimeAttr? runtime_attr_SplitVcf - RuntimeAttr? runtime_attr_ConcatBeds - } - - Array[String] contigs = transpose(read_tsv(contig_list))[0] - scatter ( contig in contigs ) { - - call tasks10.LocalizeCram as LocalizeCram{ - input: - contig = contig, - ref_fasta=ref_fasta, - ref_fai=ref_fai, - ref_dict=ref_dict, - bam_or_cram_file=bam_or_cram_file, - bam_or_cram_index=bam_or_cram_index, - sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_override=runtime_attr_LocalizeCram - } - - call tasks10.SplitVcf as SplitVcf{ - input: - contig = contig, - vcf_file = vcf_file, - sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_override=runtime_attr_SplitVcf - } - - call tasks10.vcf2bed as vcf2bed{ - input: - vcf = SplitVcf.contig_vcf, - vcf_index = SplitVcf.contig_vcf_index, - sv_pipeline_docker = sv_pipeline_docker, - runtime_attr_override = runtime_attr_vcf2bed - } - - call RunRdPeSrAnnotation{ - input: - prefix = prefix, - contig = contig, - bam_or_cram_file=LocalizeCram.local_bam, - bam_or_cram_index=LocalizeCram.local_bai, - bed = vcf2bed.bed, - pe_metrics = pe_metrics, - sr_metrics = sr_metrics, - rd_metrics = rd_metrics, - ref_fasta = ref_fasta, - ref_fai = ref_fai, - ref_dict=ref_dict, - pesrrd_annotation_docker = pesrrd_annotation_docker, - runtime_attr_override = runtime_attr_rdpesr - } - } - - call MiniTasks.ConcatBeds as ConcatPesrAnno{ - input: - shard_bed_files=RunRdPeSrAnnotation.pesr_anno, - prefix=prefix, - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_attr_ConcatBeds - } - - call MiniTasks.ConcatBeds as ConcatRdAnno{ - input: - shard_bed_files=RunRdPeSrAnnotation.cov, - prefix=prefix, - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_attr_ConcatBeds - } - - call MiniTasks.ConcatBeds as ConcatRdAnnoLeFlank{ - input: - shard_bed_files=RunRdPeSrAnnotation.cov_le_flank, - prefix=prefix, - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_attr_ConcatBeds - } - - call MiniTasks.ConcatBeds as ConcatRdAnnoRiFlank{ - input: - shard_bed_files=RunRdPeSrAnnotation.cov_ri_flank, - prefix=prefix, - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_attr_ConcatBeds - } - - - output{ - File PesrAnno = ConcatPesrAnno.merged_bed_file - File RdAnno = ConcatRdAnno.merged_bed_file - File RdAnnoLeFlank = ConcatRdAnnoLeFlank.merged_bed_file - File RdAnnoRiFlank = ConcatRdAnnoRiFlank.merged_bed_file - } - } - - -task RunRdPeSrAnnotation{ - input{ - String prefix - String contig - File bam_or_cram_file - File bam_or_cram_index - File bed - File pe_metrics - File sr_metrics - File rd_metrics - File ref_fasta - File ref_fai - File ref_dict - String pesrrd_annotation_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 5, - boot_disk_gb: 10, - preemptible_tries: 0, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - String filename = basename(bed, '.bed') - - output { - File pesr_anno = "~{bed}.INS_with_SR_PE" - File cov = "~{bed}.Seq_Cov" - File cov_ri_flank = "~{bed}.ri_flank.Seq_Cov" - File cov_le_flank = "~{bed}.le_flank.Seq_Cov" - } - - command <<< - - set -Eeuo pipefail - Rscript /src/modify_bed_for_PE_SR_RD_labeling.R -i ~{bed} - - python3 /src/add_SR_PE_to_PB_INS.V2.py ~{bed} ~{pe_metrics} ~{sr_metrics} - - zcat ~{rd_metrics} | grep -v '@' | grep -v CONTIG |bgzip > bincov.tsv.gz - Rscript /src/bincov_to_normCov.R -i bincov.tsv.gz - bgzip normCov.tsv - tabix normCov.tsv.gz - - python3 /src/add_RD_to_SVs.py ~{bed} normCov.tsv.gz - python3 /src/add_RD_to_SVs.py ~{filename}.ri_flank normCov.tsv.gz - python3 /src/add_RD_to_SVs.py ~{filename}.le_flank normCov.tsv.gz - - >>> - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: pesrrd_annotation_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} diff --git a/wdl/RdTestVisualization.wdl b/wdl/RdTestVisualization.wdl deleted file mode 100755 index a57f899c4..000000000 --- a/wdl/RdTestVisualization.wdl +++ /dev/null @@ -1,113 +0,0 @@ -version 1.0 - -import "Structs.wdl" - -workflow RdTestVisualization{ - input{ - String prefix - Array[File] medianfile - File pedfile - File sample_batches - File batch_bincov - File bed - String flags - String sv_pipeline_docker - RuntimeAttr? runtime_attr_rdtest - } - call rdtest{ - input: - bed=bed, - medianfile=medianfile, - pedfile=pedfile, - sample_batches=sample_batches, - batch_bincov=batch_bincov, - prefix=prefix, - flags=flags, - sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_override = runtime_attr_rdtest - } - output{ - File Plots = rdtest.plots - } -} - - -# Run rdtest -task rdtest { - input{ - File bed - File sample_batches # samples, batches - File batch_bincov # batch, bincov - Array[File] medianfile - File pedfile - String prefix - String sv_pipeline_docker - String flags - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 7.5, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -ex - awk -v OFS="\t" '{print $1,$2,$3,$4,$6,$5}' ~{bed} |egrep "DEL|DUP" | sort -k1,1 -k2,2n> test.bed - cut -f5 test.bed |sed 's/\,/\n/g'|sort -u > samples.txt - fgrep -wf samples.txt ~{sample_batches} |awk '{print $2}' |sort -u >existing_batches.txt - fgrep -f existing_batches.txt ~{batch_bincov} > bincovlist.txt - paste ~{sep=" " medianfile} > medianfile.txt - - i=0 - bedtools merge -i test.bed > test.merged.bed - while read batch bincov - do - let "i=$i+1" - if [ $i -gt 1 ] - then - export GCS_OAUTH_TOKEN=`gcloud auth application-default print-access-token` - tabix -h $bincov -R test.merged.bed|cut -f4->covfile.$i.bed - else - export GCS_OAUTH_TOKEN=`gcloud auth application-default print-access-token` - tabix -h $bincov -R test.merged.bed>covfile.$i.bed - - fi - doneallcovfile.bed.gz - tabix allcovfile.bed.gz - rm covfile.*.bed - zcat allcovfile.bed.gz |head -n 1|cut -f 4-|tr '\t' '\n'>samples.txt - Rscript /opt/RdTest/RdTest.R \ - -b test.bed \ - -n ~{prefix} \ - -c allcovfile.bed.gz \ - -m medianfile.txt \ - -f ~{pedfile} \ - -p TRUE \ - -w samples.txt \ - -s 10000000 \ - ~{flags} - mkdir ~{prefix}_rd_plots - mv *jpg ~{prefix}_rd_plots - tar -czvf ~{prefix}_rd_plots.tar.gz ~{prefix}_rd_plots/ - >>> - - output { - File plots = "~{prefix}_rd_plots.tar.gz" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} diff --git a/wdl/RenameVcfSamples.wdl b/wdl/RenameVcfSamples.wdl deleted file mode 100644 index 0d1dd25c9..000000000 --- a/wdl/RenameVcfSamples.wdl +++ /dev/null @@ -1,90 +0,0 @@ -version 1.0 - -import "Structs.wdl" - -workflow RenameVcfSamples { - input { - File vcf - File? vcf_index - Array[String]? current_sample_ids - Array[String]? new_sample_ids - File? sample_id_rename_map # TSV with old sample ID in first column, new sample ID in second column. Either provide this file or both of the string arrays - String prefix - Boolean? check_rename_all_samples # if True, check that all sample IDs in header are in renaming map / current sample IDs list - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - call RenameVcfSamplesTask { - input: - vcf=vcf, - vcf_idx=select_first([vcf_index, "~{vcf}.tbi"]), - sample_id_rename_map = if (defined(current_sample_ids) && defined(new_sample_ids)) then - write_tsv(transpose([select_first([current_sample_ids]), select_first([new_sample_ids])])) else - select_first([sample_id_rename_map]), - prefix="~{prefix}.renamed", - check_rename_all_samples = check_rename_all_samples, - sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_override=runtime_attr_override - } - - output { - File vcf_out = RenameVcfSamplesTask.out - File vcf_out_index = RenameVcfSamplesTask.out_index - } -} - -task RenameVcfSamplesTask { - input { - File vcf - File vcf_idx - File sample_id_rename_map - String prefix - Boolean? check_rename_all_samples = true - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr runtime_default = object { - mem_gb: 1.0, - disk_gb: ceil(10 + size(vcf, "GiB") * 2.0), - cpu_cores: 1, - preemptible_tries: 3, - max_retries: 1, - boot_disk_gb: 10 - } - RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) - runtime { - memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GiB" - disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" - cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) - preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) - maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) - docker: sv_pipeline_docker - bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) - } - - command <<< - set -euo pipefail - if ~{check_rename_all_samples}; then - bcftools query -l ~{vcf} | sort > samples.list - python <>> - - output { - File out = "~{prefix}.vcf.gz" - File out_index = "~{prefix}.vcf.gz.tbi" - } -} diff --git a/wdl/ReviseSVtypeINStoMEI.wdl b/wdl/ReviseSVtypeINStoMEI.wdl deleted file mode 100644 index 237be9326..000000000 --- a/wdl/ReviseSVtypeINStoMEI.wdl +++ /dev/null @@ -1,60 +0,0 @@ -version 1.0 - -import "Structs.wdl" -import "TasksMakeCohortVcf.wdl" as MiniTasks -import "ReviseSVtypeINStoMEIperContig.wdl" as ReviseSVtypePerContig - -workflow ReviseSVtypeINStoMEI { - input{ - File vcf - File vcf_idx - String prefix - File contiglist - - Int max_shards_per_chrom_step1 - Int min_records_per_shard_step1 - - String sv_base_mini_docker - String sv_pipeline_docker - - RuntimeAttr? runtime_override_split_vcf_to_clean - RuntimeAttr? runtime_attr_ReviseSVtypeMEI - RuntimeAttr? runtime_override_combine_step_1_vcfs - } - - Array[String] contigs = transpose(read_tsv(contiglist))[0] - scatter ( contig in contigs ) { - call ReviseSVtypePerContig.ReviseSVtypeINStoMEIperContig as ReviseSVtypeINStoMEIperContig{ - input: - vcf = vcf, - vcf_idx = vcf_idx, - prefix = prefix, - contig = contig, - max_shards_per_chrom_step1 = max_shards_per_chrom_step1, - min_records_per_shard_step1 = min_records_per_shard_step1, - sv_base_mini_docker = sv_base_mini_docker, - runtime_override_split_vcf_to_clean = runtime_override_split_vcf_to_clean, - runtime_attr_ReviseSVtypeMEI = runtime_attr_ReviseSVtypeMEI, - runtime_override_combine_step_1_vcfs = runtime_override_combine_step_1_vcfs - } - } - - call MiniTasks.ConcatVcfs as CombineStep2Vcfs { - input: - vcfs = ReviseSVtypeINStoMEIperContig.updated_vcf, - vcfs_idx = ReviseSVtypeINStoMEIperContig.updated_vcf_idx, - naive = true, - outfile_prefix = "~{prefix}.SVtypeRevisedINStoMEI", - sv_base_mini_docker = sv_base_mini_docker, - runtime_attr_override = runtime_override_combine_step_1_vcfs - } - - output{ - File updated_vcf = CombineStep2Vcfs.concat_vcf - File updated_vcf_idx = CombineStep2Vcfs.concat_vcf_idx - } -} - - - - diff --git a/wdl/ReviseSVtypeINStoMEIperContig.wdl b/wdl/ReviseSVtypeINStoMEIperContig.wdl deleted file mode 100644 index 531f404eb..000000000 --- a/wdl/ReviseSVtypeINStoMEIperContig.wdl +++ /dev/null @@ -1,112 +0,0 @@ -version 1.0 - -import "Structs.wdl" -import "TasksMakeCohortVcf.wdl" as MiniTasks - - -workflow ReviseSVtypeINStoMEIperContig { - input{ - File vcf - File vcf_idx - String prefix - String contig - - Int max_shards_per_chrom_step1 - Int min_records_per_shard_step1 - - String sv_base_mini_docker - - RuntimeAttr? runtime_override_split_vcf_to_clean - RuntimeAttr? runtime_attr_ReviseSVtypeMEI - RuntimeAttr? runtime_override_combine_step_1_vcfs - } - - - call MiniTasks.SplitVcf as SplitVcfReviseSVtypeMEI { - input: - vcf=vcf, - contig=contig, - prefix="~{prefix}.~{contig}.shard_", - n_shards=max_shards_per_chrom_step1, - min_vars_per_shard=min_records_per_shard_step1, - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_override_split_vcf_to_clean - } - - scatter (vcf_shard in SplitVcfReviseSVtypeMEI.vcf_shards) { - call ReviseSVtypeMEI{ - input: - vcf = vcf_shard, - sv_base_mini_docker = sv_base_mini_docker, - prefix = "~{prefix}.~{contig}.SVtypeRevised.shard_", - runtime_attr_override = runtime_attr_ReviseSVtypeMEI - } - } - - call MiniTasks.ConcatVcfs as CombineStep1Vcfs { - input: - vcfs=ReviseSVtypeMEI.updated_vcf, - vcfs_idx=ReviseSVtypeMEI.updated_vcf_idx, - naive=true, - outfile_prefix="~{prefix}.~{contig}.SVtypeRevisedINStoMEI", - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_override_combine_step_1_vcfs - } - - output{ - File updated_vcf = CombineStep1Vcfs.concat_vcf - File updated_vcf_idx = CombineStep1Vcfs.concat_vcf_idx - } -} - - - -# revise svtype of MEIs to SVTYPE=MEI -task ReviseSVtypeMEI{ - input{ - File vcf - String prefix - String sv_base_mini_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 100, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - command <<< - zcat ~{vcf} | grep '#' > ~{prefix}.vcf - zcat ~{vcf} | grep -v '#' | grep "INS:ME" | sed -e "s/SVTYPE=INS/SVTYPE=MEI/" >> ~{prefix}.vcf - zcat ~{vcf} | grep -v '#' | grep -v "INS:ME" >> ~{prefix}.vcf - mkdir tmp - vcf-sort -t tmp/ ~{prefix}.vcf | bgzip > ~{prefix}.vcf.gz - tabix -p vcf ~{prefix}.vcf.gz - >>> - - output{ - File updated_vcf = "~{prefix}.vcf.gz" - File updated_vcf_idx = "~{prefix}.vcf.gz.tbi" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_base_mini_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - - - - diff --git a/wdl/SetSampleIdLegacy.wdl b/wdl/SetSampleIdLegacy.wdl deleted file mode 100644 index 17957d3a3..000000000 --- a/wdl/SetSampleIdLegacy.wdl +++ /dev/null @@ -1,143 +0,0 @@ -version 1.0 - -import "Structs.wdl" - -workflow SetSampleIdLegacy { - input { - String sample_name - File? BAF_file - File? BAF_file_index - File? PE_file - File? PE_file_index - File? SR_file - File? SR_file_index - File? SD_file - File? SD_file_index - File reference_dict - String gatk_docker - RuntimeAttr? runtime_attr_override - } - - if (defined(BAF_file)) { - call SetSampleId as SetSampleIdBAF { - input: - evidence_file = select_first([BAF_file]), - evidence_file_index = BAF_file_index, - file_type = "baf", - sample_name = sample_name, - sample_column = 4, - reference_dict = reference_dict, - gatk_docker = gatk_docker, - runtime_attr_override = runtime_attr_override - } - } - - if (defined(PE_file)) { - call SetSampleId as SetSampleIdPE { - input: - evidence_file = select_first([PE_file]), - evidence_file_index = PE_file_index, - file_type = "pe", - sample_name = sample_name, - sample_column = 7, - reference_dict = reference_dict, - gatk_docker = gatk_docker, - runtime_attr_override = runtime_attr_override - } - } - - if (defined(SR_file)) { - call SetSampleId as SetSampleIdSR { - input: - evidence_file = select_first([SR_file]), - evidence_file_index = SR_file_index, - file_type = "sr", - sample_name = sample_name, - sample_column = 5, - reference_dict = reference_dict, - gatk_docker = gatk_docker, - runtime_attr_override = runtime_attr_override - } - } - - if (defined(SD_file)) { - call SetSampleId as SetSampleIdSD { - input: - evidence_file = select_first([SD_file]), - evidence_file_index = SD_file_index, - file_type = "sd", - sample_name = sample_name, - sample_column = 3, - reference_dict = reference_dict, - gatk_docker = gatk_docker, - runtime_attr_override = runtime_attr_override - } - } - - output { - File? BAF_out = SetSampleIdBAF.out - File? BAF_out_index = SetSampleIdBAF.out_index - File? PE_out = SetSampleIdPE.out - File? PE_out_index = SetSampleIdPE.out_index - File? SR_out = SetSampleIdSR.out - File? SR_out_index = SetSampleIdSR.out_index - File? SD_out = SetSampleIdSD.out - File? SD_out_index = SetSampleIdSD.out_index - } -} - -task SetSampleId { - input { - File evidence_file - File? evidence_file_index - String file_type - String sample_name - Int sample_column - File reference_dict - String gatk_docker - RuntimeAttr? runtime_attr_override - } - - Int disk_size = 10 + ceil(size(evidence_file, "GiB") * 2) - - RuntimeAttr default_attr = object { - cpu_cores: 2, - mem_gb: 3.75, - disk_gb: disk_size, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - output { - File out = "~{sample_name}.~{file_type}.txt.gz" - File out_index = "~{sample_name}.~{file_type}.txt.gz.tbi" - } - command <<< - - set -euo pipefail - - fifo_name="~{sample_name}.~{file_type}.txt" - output_name="~{sample_name}.~{file_type}.txt.gz" - - if [ ! -f "~{evidence_file}.tbi" ]; then - tabix -0 -s1 -b2 -e2 ~{evidence_file} - fi - - mkfifo $fifo_name - /gatk/gatk --java-options "-Xmx2000m" PrintSVEvidence -F ~{evidence_file} --sequence-dictionary ~{reference_dict} -O $fifo_name & - awk '{$~{sample_column}="~{sample_name}"}' < $fifo_name | bgzip -c > $output_name - tabix -0 -s1 -b2 -e2 $output_name - - >>> - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: gatk_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} diff --git a/wdl/SubsetVcfBySamples.wdl b/wdl/SubsetVcfBySamples.wdl deleted file mode 100644 index f41ace8a1..000000000 --- a/wdl/SubsetVcfBySamples.wdl +++ /dev/null @@ -1,35 +0,0 @@ -version 1.0 - -import "Structs.wdl" -import "Utils.wdl" as util - -workflow SubsetVcfBySamples { - input { - File vcf - File? vcf_index - File list_of_samples # List of samples to keep (default, remove_samples = false) or remove (remove_samples = true) - String? outfile_name - Boolean? remove_samples # If false (default), keep samples in provided list. If true, remove them. - Boolean? remove_private_sites # If true (default), remove sites that are private to excluded samples. If false, keep sites even if no remaining samples are non-ref. - - String sv_base_mini_docker - RuntimeAttr? runtime_attr_subset_by_samples - } - - call util.SubsetVcfBySamplesList { - input: - vcf = vcf, - vcf_idx = vcf_index, - list_of_samples = list_of_samples, - outfile_name = outfile_name, - remove_samples = remove_samples, - remove_private_sites = remove_private_sites, - sv_base_mini_docker = sv_base_mini_docker, - runtime_attr_override = runtime_attr_subset_by_samples - } - - output { - File vcf_subset = SubsetVcfBySamplesList.vcf_subset - File vcf_subset_index = SubsetVcfBySamplesList.vcf_subset_index - } -} diff --git a/wdl/TasksBenchmark.wdl b/wdl/TasksBenchmark.wdl deleted file mode 100644 index dd7ae8476..000000000 --- a/wdl/TasksBenchmark.wdl +++ /dev/null @@ -1,339 +0,0 @@ -version 1.0 - -import "Structs.wdl" - -# Merge shards after Vapor -task ConcatVapor { - input { - Array[File] shard_bed_files - Array[File] shard_plots - String prefix - Boolean? index_output - String sv_base_mini_docker - RuntimeAttr? runtime_attr_override - } - - Boolean call_tabix = select_first([index_output, true]) - String output_file="~{prefix}.bed.gz" - - # when filtering/sorting/etc, memory usage will likely go up (much of the data will have to - # be held in memory or disk while working, potentially in a form that takes up more space) - Float input_size = size(shard_bed_files, "GB") - Float compression_factor = 5.0 - RuntimeAttr runtime_default = object { - mem_gb: 2.0 + compression_factor * input_size, - disk_gb: ceil(10.0 + input_size * (2.0 + compression_factor)), - cpu_cores: 1, - preemptible_tries: 3, - max_retries: 1, - boot_disk_gb: 10 - } - RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) - runtime { - memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB" - disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" - cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) - preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) - maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) - docker: sv_base_mini_docker - bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) - } - - command <<< - set -eu - - zcat ~{shard_bed_files[0]} | head -n1 > header.txt - # note head -n1 stops reading early and sends SIGPIPE to zcat, - # so setting pipefail here would result in early termination - - # no more early stopping - set -o pipefail - - while read SPLIT; do - zcat $SPLIT | tail -n+2 - done < ~{write_lines(shard_bed_files)} \ - | sort -Vk1,1 -k2,2n -k3,3n \ - | cat header.txt - \ - | bgzip -c \ - > ~{output_file} - - if ~{call_tabix}; then - tabix -f -p bed ~{output_file} - else - touch ~{output_file}.tbi - fi - - mkdir ~{prefix}.plots - while read SPLIT; do - tar zxvf $SPLIT -C ~{prefix}.plots/ - done < ~{write_lines(shard_plots)} - - tar -czf ~{prefix}.plots.tar.gz ~{prefix}.plots/ - >>> - - output { - File merged_bed_file = output_file - File merged_bed_plot = "~{prefix}.plots.tar.gz" - } -} - -#localize a specific contig of a bam/cram file -task LocalizeCram { - input { - String contig - File ref_fasta - File ref_fai - File ref_dict - String bam_or_cram_file - String bam_or_cram_index - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 15, - disk_gb: 40, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - Float mem_gb = select_first([runtime_attr.mem_gb, default_attr.mem_gb]) - Int java_mem_mb = ceil(mem_gb * 1000 * 0.8) - - output { - File local_bam = "~{contig}.bam" - File local_bai = "~{contig}.bam.bai" - } - - command <<< - set -Eeuo pipefail - - java -Xmx~{java_mem_mb}M -jar ${GATK_JAR} PrintReads \ - -I ~{bam_or_cram_file} \ - -L ~{contig} \ - -O ~{contig}.bam \ - -R ~{ref_fasta} - >>> - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task LocalizeCramRequestPay { - input { - String contig - File ref_fasta - File ref_fai - File ref_dict - String project_id - String bam_or_cram_file - String bam_or_cram_index - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - Float mem_gb = select_first([runtime_attr.mem_gb, default_attr.mem_gb]) - Int java_mem_mb = ceil(mem_gb * 1000 * 0.8) - - output{ - File local_bam = "~{contig}.bam" - File local_bai = "~{contig}.bam.bai" - } - - command <<< - set -Eeuo pipefail - - java -Xmx~{java_mem_mb}M -jar ${GATK_JAR} PrintReads \ - -I ~{bam_or_cram_file} \ - -L ~{contig} \ - -O ~{contig}.bam \ - -R ~{ref_fasta} \ - --gcs-project-for-requester-pays ~{project_id} - >>> - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -# extract specific contig from BED, and sites for sample if provided, and add SVLEN to INS if header contains SVLEN column -task PreprocessBedForVapor { - input { - String prefix - String contig - String? sample_to_extract - File bed_file # first 5 columns must be chrom, start, end, name, svtype (or Vapor description). if >5 columns, use header or assume samples is 6th. Need header & SVLEN column unless already appended to INS descriptions - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - output { - File contig_bed = "~{prefix}.bed" - } - - command <<< - set -euo pipefail - python /opt/sv-pipeline/scripts/preprocess_bed_for_vapor.py \ - --contig ~{contig} \ - --bed-in ~{bed_file} \ - --bed-out ~{prefix}.bed \ - ~{"-s " + sample_to_extract} - >>> - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } - -} - -task SplitVcf { - input { - String contig - File vcf_file - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - output{ - File contig_vcf = "~{contig}.vcf.gz" - File contig_vcf_index = "~{contig}.vcf.gz.tbi" - } - - command <<< - if [[ ~{vcf_file} == *.gz ]] ; then - tabix -f -p vcf ~{vcf_file} - tabix -h ~{vcf_file} ~{contig} | bgzip > ~{contig}.vcf.gz - tabix -p vcf ~{contig}.vcf.gz - else - bgzip ~{vcf_file} - tabix -f -p vcf ~{vcf_file}.gz - tabix -h ~{vcf_file}.gz ~{contig} | bgzip > ~{contig}.vcf.gz - tabix -p vcf ~{contig}.vcf.gz - fi - >>> - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - -task vcf2bed { - input { - File vcf - File? vcf_index - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 10, - disk_gb: 100, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - String filename = basename(vcf, ".vcf.gz") - - output { - File bed = "${filename}.bed" - } - - command <<< - - set -Eeuo pipefail - - svtk vcf2bed -i SVTYPE -i SVLEN ~{vcf} tmp1.bed - - cat \ - <(awk '{if ($5=="DEL") print}' tmp1.bed | cut -f1-5) \ - <(awk '{if ($5=="DUP") print}' tmp1.bed | cut -f1-5) \ - <(awk '{if ($5=="INV") print}' tmp1.bed | cut -f1-5) \ - > ~{filename}.bed - - paste -d '_' \ - <(awk '{if ($5=="INS") print}' tmp1.bed | cut -f1-5) \ - <(awk '{if ($5=="INS") print}' tmp1.bed | cut -f8) \ - >> ~{filename}.bed - - >>> - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - - - - - - - diff --git a/wdl/Vapor.wdl b/wdl/Vapor.wdl index 5d0c84d32..386e8d607 100644 --- a/wdl/Vapor.wdl +++ b/wdl/Vapor.wdl @@ -1,7 +1,6 @@ version 1.0 import "Structs.wdl" -import "TasksBenchmark.wdl" as tasks10 workflow Vapor { input { @@ -34,7 +33,7 @@ workflow Vapor { scatter (contig in read_lines(contigs)) { - call tasks10.PreprocessBedForVapor { + call PreprocessBedForVapor { input: prefix = "~{prefix}.~{contig}.preprocess", contig = contig, @@ -59,7 +58,7 @@ workflow Vapor { } } - call tasks10.ConcatVapor { + call ConcatVapor { input: shard_bed_files = RunVaporWithCram.vapor, shard_plots = RunVaporWithCram.vapor_plot, @@ -74,6 +73,52 @@ workflow Vapor { } } +# extract specific contig from BED, and sites for sample if provided, and add SVLEN to INS if header contains SVLEN column +task PreprocessBedForVapor { + input { + String prefix + String contig + String? sample_to_extract + File bed_file # first 5 columns must be chrom, start, end, name, svtype (or Vapor description). if >5 columns, use header or assume samples is 6th. Need header & SVLEN column unless already appended to INS descriptions + String sv_pipeline_docker + RuntimeAttr? runtime_attr_override + } + + RuntimeAttr default_attr = object { + cpu_cores: 1, + mem_gb: 3.75, + disk_gb: 10, + boot_disk_gb: 10, + preemptible_tries: 3, + max_retries: 1 + } + + RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) + + output { + File contig_bed = "~{prefix}.bed" + } + + command <<< + set -euo pipefail + python /opt/sv-pipeline/scripts/preprocess_bed_for_vapor.py \ + --contig ~{contig} \ + --bed-in ~{bed_file} \ + --bed-out ~{prefix}.bed \ + ~{"-s " + sample_to_extract} + >>> + + runtime { + cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) + memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" + bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) + docker: sv_pipeline_docker + preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) + maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) + } +} + task RunVaporWithCram { input { String prefix @@ -136,3 +181,78 @@ task RunVaporWithCram { maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) } } + +# Merge shards after Vapor +task ConcatVapor { + input { + Array[File] shard_bed_files + Array[File] shard_plots + String prefix + Boolean? index_output + String sv_base_mini_docker + RuntimeAttr? runtime_attr_override + } + + Boolean call_tabix = select_first([index_output, true]) + String output_file="~{prefix}.bed.gz" + + # when filtering/sorting/etc, memory usage will likely go up (much of the data will have to + # be held in memory or disk while working, potentially in a form that takes up more space) + Float input_size = size(shard_bed_files, "GB") + Float compression_factor = 5.0 + RuntimeAttr runtime_default = object { + mem_gb: 2.0 + compression_factor * input_size, + disk_gb: ceil(10.0 + input_size * (2.0 + compression_factor)), + cpu_cores: 1, + preemptible_tries: 3, + max_retries: 1, + boot_disk_gb: 10 + } + RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) + runtime { + memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB" + disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" + cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) + preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) + maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) + docker: sv_base_mini_docker + bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) + } + + command <<< + set -eu + + zcat ~{shard_bed_files[0]} | head -n1 > header.txt + # note head -n1 stops reading early and sends SIGPIPE to zcat, + # so setting pipefail here would result in early termination + + # no more early stopping + set -o pipefail + + while read SPLIT; do + zcat $SPLIT | tail -n+2 + done < ~{write_lines(shard_bed_files)} \ + | sort -Vk1,1 -k2,2n -k3,3n \ + | cat header.txt - \ + | bgzip -c \ + > ~{output_file} + + if ~{call_tabix}; then + tabix -f -p bed ~{output_file} + else + touch ~{output_file}.tbi + fi + + mkdir ~{prefix}.plots + while read SPLIT; do + tar zxvf $SPLIT -C ~{prefix}.plots/ + done < ~{write_lines(shard_plots)} + + tar -czf ~{prefix}.plots.tar.gz ~{prefix}.plots/ + >>> + + output { + File merged_bed_file = output_file + File merged_bed_plot = "~{prefix}.plots.tar.gz" + } +} \ No newline at end of file diff --git a/wdl/XfBatchEffect.wdl b/wdl/XfBatchEffect.wdl deleted file mode 100644 index 810c049ea..000000000 --- a/wdl/XfBatchEffect.wdl +++ /dev/null @@ -1,674 +0,0 @@ -version 1.0 - -import "prune_add_af.wdl" as calcAF -import "batch_effect_helper.wdl" as helper -import "TasksMakeCohortVcf.wdl" as MiniTasks - -workflow XfBatchEffect { - input{ - File vcf - File vcf_idx - File sample_batch_assignments - File batches_list - File sample_pop_assignments - File excludesamples_list #empty file if need be - File famfile - File contiglist - File? par_bed - Int variants_per_shard - Int? pairwise_cutoff=2 - Int? onevsall_cutoff=2 - String prefix - File af_pcrmins_premingq - String sv_pipeline_docker - - RuntimeAttr? runtime_attr_merge_labeled_vcfs - } - Array[String] batches = read_lines(batches_list) - Array[Array[String]] contigs = read_tsv(contiglist) - - # Shard VCF per batch, compute pops-specific AFs, and convert to table of VID & AF stats - scatter ( batch in batches ) { - # Get list of samples to include & exclude per batch - call GetBatchSamplesList { - input: - vcf=vcf, - vcf_idx=vcf_idx, - batch=batch, - sample_batch_assignments=sample_batch_assignments, - probands_list=excludesamples_list, - sv_pipeline_docker=sv_pipeline_docker - } - # Prune VCF to samples - call calcAF.prune_and_add_vafs as getAFs { - input: - vcf=vcf, - vcf_idx=vcf_idx, - prefix=batch, - sample_pop_assignments=sample_pop_assignments, - prune_list=GetBatchSamplesList.exclude_samples_list, - famfile=famfile, - sv_per_shard=25000, - contiglist=contiglist, - drop_empty_records="FALSE", - par_bed=par_bed, - sv_pipeline_docker=sv_pipeline_docker - } - # Get minimal table of AF data per batch, split by ancestry - call GetFreqTable { - input: - vcf=getAFs.output_vcf, - prefix=batch, - sv_pipeline_docker=sv_pipeline_docker - } - } - - # Merge frequency results per batch into a single table of all variants with AF data across batches - call MergeFreqTables { - input: - tables=GetFreqTable.freq_data, - batches_list=batches_list, - prefix=prefix, - sv_pipeline_docker=sv_pipeline_docker - } - call MergeFreqTables as MergeFreqTables_allPops { - input: - tables=GetFreqTable.freq_data_allPops, - batches_list=batches_list, - prefix=prefix, - sv_pipeline_docker=sv_pipeline_docker - } - - # Compare frequencies before and after minGQ, and generate list of variants - # that are significantly different between the steps - call CompareFreqsPrePostMinGQPcrminus { - input: - af_pcrmins_premingq=af_pcrmins_premingq, - AF_postMinGQ_table=MergeFreqTables_allPops.merged_table, - prefix=prefix, - sv_pipeline_docker=sv_pipeline_docker - } - - # Generate matrix of correlation coefficients for all batches, by population & SVTYPE - #scatter ( pop in populations ) { - # call MakeCorrelationMatrices { - # input: - # freq_table=MergeFreqTables.merged_table, - # pop=pop, - # batches_list=batches_list, - # prefix=prefix, - # sv_pipeline_docker=sv_pipeline_docker - # } - #} - - # Make list of nonredundant pairs of batches to be evaluated - call MakeBatchPairsList { - input: - batches_list=batches_list, - prefix=prefix, - sv_pipeline_docker=sv_pipeline_docker - } - Array[Array[String]] batch_pairs = read_tsv(MakeBatchPairsList.batch_pairs_list) - - # Compute AF stats per pair of batches & determine variants with batch effects - scatter ( pair in batch_pairs ) { - call helper.check_batch_effects as check_batch_effects { - input: - freq_table=MergeFreqTables.merged_table, - batch1=pair[0], - batch2=pair[1], - prefix=prefix, - variants_per_shard=variants_per_shard, - sv_pipeline_docker=sv_pipeline_docker - } - } - # Collect results from pairwise batch effect detection - call MergeVariantFailureLists as merge_pairwise_checks { - input: - fail_variant_lists=check_batch_effects.batch_effect_variants, - prefix="~{prefix}.pairwise_comparisons", - sv_pipeline_docker=sv_pipeline_docker - } - - # Perform one-vs-all comparison of AFs per batch to find batch-specific sites - scatter ( batch in batches ) { - call helper.check_batch_effects as one_vs_all_comparison { - input: - freq_table=MergeFreqTables.merged_table, - batch1=batch, - batch2="ALL_OTHERS", - prefix=prefix, - variants_per_shard=variants_per_shard, - sv_pipeline_docker=sv_pipeline_docker - } - } - # Collect results from pairwise batch effect detection - call MergeVariantFailureLists as merge_one_vs_all_checks { - input: - fail_variant_lists=one_vs_all_comparison.batch_effect_variants, - prefix="~{prefix}.one_vs_all_comparisons", - sv_pipeline_docker=sv_pipeline_docker - } - - # Distill final table of variants to be reclassified - call MakeReclassificationTable { - input: - freq_table=MergeFreqTables.merged_table, - pairwise_fails=merge_pairwise_checks.fails_per_variant_all, - onevsall_fails=merge_one_vs_all_checks.fails_per_variant_all, - prefix=prefix, - pairwise_cutoff = pairwise_cutoff, - onevsall_cutoff = onevsall_cutoff, - sv_pipeline_docker=sv_pipeline_docker - } - - # Apply batch effect labels - scatter ( contig in contigs ) { - call ApplyBatchEffectLabels as apply_labels_perContig { - input: - vcf=vcf, - vcf_idx=vcf_idx, - contig=contig[0], - reclassification_table=MakeReclassificationTable.reclassification_table, - mingq_prePost_pcrminus_fails=CompareFreqsPrePostMinGQPcrminus.pcrminus_fails, - prefix="~{prefix}.~{contig[0]}", - sv_pipeline_docker=sv_pipeline_docker - } - } - call MiniTasks.ConcatVcfs as merge_labeled_vcfs { - input: - vcfs=apply_labels_perContig.labeled_vcf, - naive=true, - outfile_prefix="~{prefix}.batch_effects_labeled_merged", - sv_base_mini_docker=sv_pipeline_docker, - runtime_attr_override=runtime_attr_merge_labeled_vcfs - } - - output { - File labeled_vcf = merge_labeled_vcfs.concat_vcf - File labeled_vcf_idx = merge_labeled_vcfs.concat_vcf_idx - } -} - - -# Get list of samples to include & exclude per batch -# Always exclude probands from all batches -task GetBatchSamplesList { - input{ - File vcf - File vcf_idx - String batch - File sample_batch_assignments - File probands_list - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 4, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - # Get list of all samples present in VCF header - tabix -H ~{vcf} | fgrep -v "##" | cut -f10- | sed 's/\t/\n/g' | sort -Vk1,1 \ - > all_samples.list - # Get list of samples in batch - fgrep -w ~{batch} ~{sample_batch_assignments} | cut -f1 \ - | fgrep -wf - all_samples.list \ - | fgrep -wvf ~{probands_list} \ - > "~{batch}.samples.list" || true - # Get list of samples not in batch - fgrep -wv ~{batch} ~{sample_batch_assignments} | cut -f1 \ - cat - ~{probands_list} | sort -Vk1,1 | uniq \ - | fgrep -wf - all_samples.list \ - > "~{batch}.exclude_samples.list" || true - >>> - - output { - File include_samples_list = "~{batch}.samples.list" - File exclude_samples_list = "~{batch}.exclude_samples.list" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Run vcf2bed and subset to just include VID, SVTYPE, SVLEN, _AC, and _AN -task GetFreqTable { - input{ - File vcf - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 4, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - #Run vcf2bed - svtk vcf2bed \ - --info ALL \ - --no-samples \ - ~{vcf} "~{prefix}.vcf2bed.bed" - ### Create table of freqs by ancestry - #Cut to necessary columns - idxs=$( sed -n '1p' "~{prefix}.vcf2bed.bed" \ - | sed 's/\t/\n/g' \ - | awk -v OFS="\t" '{ print $1, NR }' \ - | grep -e 'name\|SVLEN\|SVTYPE\|_AC\|_AN' \ - | fgrep -v "OTH" \ - | cut -f2 \ - | paste -s -d\, || true ) - cut -f"$idxs" "~{prefix}.vcf2bed.bed" \ - | sed 's/^name/\#VID/g' \ - | gzip -c \ - > "~{prefix}.frequencies.preclean.txt.gz" - #Clean frequencies - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/clean_frequencies_table.R \ - "~{prefix}.frequencies.preclean.txt.gz" \ - "~{prefix}.frequencies.txt" - ### Create table of freqs, irrespective of ancestry - #Cut to necessary columns - idxs=$( sed -n '1p' "~{prefix}.vcf2bed.bed" \ - | sed 's/\t/\n/g' \ - | awk -v OFS="\t" '{ if ($1=="name" || $1=="SVLEN" || $1=="SVTYPE" || $1=="AC" || $1=="AN") print NR }' \ - | paste -s -d\, || true ) - cut -f"$idxs" "~{prefix}.vcf2bed.bed" \ - | sed 's/^name/\#VID/g' \ - | gzip -c \ - > "~{prefix}.frequencies.allPops.txt.gz" - >>> - - output { - File freq_data = "~{prefix}.frequencies.txt.gz" - File freq_data_allPops = "~{prefix}.frequencies.allPops.txt.gz" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Combine frequency data across batches -task MergeFreqTables { - input{ - Array[File] tables - File batches_list - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 16, - disk_gb: 100, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - #Get list of batch IDs and batch table paths - while read batch; do - echo "$batch" - find ./ -name "$batch.frequencies*txt.gz" - done < ~{batches_list} | paste - - \ - > input.list - #Make sure all input files have the same number of lines - while read batch file; do - zcat "$file" | wc -l - done < input.list > nlines.list - nlines=$( sort nlines.list | uniq | wc -l ) - if [ "$nlines" -gt 1 ]; then - echo "AT LEAST ONE INPUT FILE HAS A DIFFERENT NUMBER OF LINES" - exit 0 - fi - #Prep files for paste joining - echo "PREPPING FILES FOR MERGING" - while read batch file; do - #Header - zcat "$file" | sed -n '1p' | cut -f1-3 - #Body - zcat "$file" | sed '1d' \ - | sort -Vk1,1 \ - | cut -f1-3 - done < <( sed -n '1p' input.list ) \ - > header.txt - while read batch file; do - for wrapper in 1; do - #Header - zcat "$file" | sed -n '1p' \ - | cut -f4- | sed 's/\t/\n/g' \ - | awk -v batch="$batch" '{ print $1"."batch }' \ - | paste -s - #Body - zcat "$file" | sed '1d' \ - | sort -Vk1,1 \ - | cut -f4- - done > "$batch.prepped.txt" - done < input.list - #Join files with simple paste - paste \ - header.txt \ - $( awk -v ORS=" " '{ print $1".prepped.txt" }' input.list ) \ - | gzip -c \ - > "~{prefix}.merged_AF_table.txt.gz" - >>> - - output { - File merged_table = "~{prefix}.merged_AF_table.txt.gz" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Compare -task CompareFreqsPrePostMinGQPcrminus { - input{ - File af_pcrmins_premingq - File AF_postMinGQ_table - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 8, - disk_gb: 30, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/compare_freqs_pre_post_minGQ.PCRMinus_only.R \ - ~{af_pcrmins_premingq} \ - ~{AF_postMinGQ_table} \ - ./ \ - "~{prefix}." - >>> - - output { - File pcrminus_fails = "~{prefix}.PCRMINUS_minGQ_AF_prePost_fails.VIDs.list" - File minGQ_prePost_comparison_data = "~{prefix}.minGQ_AF_prePost_comparison.data.txt.gz" - File minGQ_prePost_comparison_plot = "~{prefix}.minGQ_AF_prePost_comparison.plot.png" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Calculate & plot cross-batch correlation coefficient matrixes -task MakeCorrelationMatrices { - input{ - File freq_table - String pop - File batches_list - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 8, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/correlate_batches_singlePop.R \ - ~{batches_list} \ - ~{freq_table} \ - "~{pop}" \ - "~{prefix}.~{pop}" - >>> - output { - Array[File] corr_matrixes = glob("~{prefix}.~{pop}.*.R2_matrix.txt") - Array[File] heat_maps = glob("~{prefix}.~{pop}.*heatmap*.pdf") - Array[File] dot_plots = glob("~{prefix}.~{pop}.*perBatch_R2_sina_plot.pdf") - } - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Generate list of all pairs of batches to be compared -task MakeBatchPairsList { - input{ - File batches_list - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 4, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/make_batch_pairs_list.R \ - ~{batches_list} \ - "~{prefix}.nonredundant_batch_pairs.txt" - >>> - - output { - File batch_pairs_list = "~{prefix}.nonredundant_batch_pairs.txt" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Merge lists of batch effect checks and count total number of times each variant failed -task MergeVariantFailureLists { - input{ - Array[File] fail_variant_lists - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 4, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - #Write list of paths to all batch effect variant lists - #Get master list of PCR+ to PCR+ failures #removed from the PCR- only projects - #Get master list of PCR- to PCR- failures #removed from the PCR- only projects - #Get master list of PCR+ to PCR- failures #removed from the PCR- only projects - #Get master list of all possible failures - cat ~{write_lines(fail_variant_lists)} \ - | xargs -I {} cat {} \ - | sort -Vk1,1 | uniq -c \ - | awk -v OFS="\t" '{ print $2, $1 }' \ - > "~{prefix}.all.failures.txt" || true - >>> - - output { - File fails_per_variant_all = "~{prefix}.all.failures.txt" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Consolidate all batch effect check results into a single table with reclassification per variant -task MakeReclassificationTable { - input{ - File freq_table - File pairwise_fails - File onevsall_fails - String prefix - Int? pairwise_cutoff - Int? onevsall_cutoff - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 8, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/make_batch_effect_reclassification_table.PCRMinus_only.R \ - ~{freq_table} \ - ~{pairwise_fails} \ - ~{onevsall_fails} \ - "~{prefix}.batch_effect_reclassification_table.txt" \ - ~{pairwise_cutoff} \ - ~{onevsall_cutoff} - >>> - - output { - File reclassification_table = "~{prefix}.batch_effect_reclassification_table.txt" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Apply batch effect labels to VCF -task ApplyBatchEffectLabels { - input{ - File vcf - File vcf_idx - String contig - File reclassification_table - File mingq_prePost_pcrminus_fails - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 4, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - tabix -h ~{vcf} ~{contig} \ - | /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/label_batch_effects.PCRMinus_only.py \ - --unstable-af-pcrminus ~{mingq_prePost_pcrminus_fails} \ - stdin \ - ~{reclassification_table} \ - stdout \ - | bgzip -c \ - > "~{prefix}.batch_effects_labeled.vcf.gz" - tabix -p vcf -f "~{prefix}.batch_effects_labeled.vcf.gz" - >>> - - output { - File labeled_vcf = "~{prefix}.batch_effects_labeled.vcf.gz" - File labeled_vcf_idx = "~{prefix}.batch_effects_labeled.vcf.gz.tbi" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} diff --git a/wdl/batch_effect_helper.wdl b/wdl/batch_effect_helper.wdl deleted file mode 100644 index a8bf397cc..000000000 --- a/wdl/batch_effect_helper.wdl +++ /dev/null @@ -1,205 +0,0 @@ -version 1.0 - -import "Structs.wdl" - -workflow check_batch_effects { - input{ - File freq_table - String batch1 - String batch2 - String prefix - Int variants_per_shard - String sv_pipeline_docker - } - # Shard frequency table - call ShardTable { - input: - freq_table=freq_table, - variants_per_shard=variants_per_shard, - sv_pipeline_docker=sv_pipeline_docker - } - - # Scatter over shards and compute AF correlations for each variant - scatter ( shard in ShardTable.shards ) { - call CompareBatches { - input: - freq_table=shard, - batch1=batch1, - batch2=batch2, - prefix=prefix, - sv_pipeline_docker=sv_pipeline_docker - } - } - - # Combine shards, perform bonferroni correction to determine significant batch effects, and plot AF correlation scatter - call CombineShards { - input: - freq_tables=CompareBatches.results, - batch1=batch1, - batch2=batch2, - prefix=prefix, - sv_pipeline_docker=sv_pipeline_docker - } - - # Outputs - output { - File comparison_table = CombineShards.merged_table - File batch_effect_variants = CombineShards.batch_effect_variants - File scatterplots_tarball = CombineShards.correlation_scatterplots_tarball - } -} - - -# Shard a frequency table into an even number of evenly sized shards -task ShardTable { - input{ - File freq_table - Int variants_per_shard - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 4, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - #Split variant lines - zcat ~{freq_table} | sed '1d' | \ - split -l ~{variants_per_shard} --numeric-suffixes=00001 -a 5 /dev/stdin freq_table_shard_ || true - #Add header & gzip each shard - zcat ~{freq_table} | sed -n '1p' > header.txt - maxshard=$( find / -name "freq_table_shard_*" | awk -v FS="_" '{ print $NF }' \ - | sort -Vrk1,1 | sed -n '1p' || true ) - for i in $( seq -w 00001 "$maxshard" ); do - cat header.txt "freq_table_shard_$i" \ - | gzip -c \ - > "freq_table_shard_$i.txt.gz" || true - done - >>> - - output { - Array[File] shards = glob("freq_table_shard_*.txt.gz") - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Compare AF stats per variant between a pair of batches -task CompareBatches { - input{ - File freq_table - String batch1 - String batch2 - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 4, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/find_batch_effects.shard_helper.R \ - ~{freq_table} \ - "~{batch1}" \ - "~{batch2}" \ - "~{prefix}.~{batch1}_vs_~{batch2}.results.txt" - gzip "~{prefix}.~{batch1}_vs_~{batch2}.results.txt" - >>> - - output { - File results = "~{prefix}.~{batch1}_vs_~{batch2}.results.txt.gz" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - - -# Merge sharded comparison results and perform analysis for batch effects -task CombineShards { - input{ - Array[File] freq_tables - String batch1 - String batch2 - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 4, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - #Write header - zcat ~{freq_tables[0]} | sed -n '1p' > header.txt || true - #Iterate over files and cat - while read file; do - zcat "$file" | sed '1d' - done < ~{write_lines(freq_tables)} \ - | cat header.txt - \ - | gzip -c \ - > "~{prefix}.~{batch1}_vs_~{batch2}.AF_comparison_table.txt.gz" || true - #Analyze - mkdir "~{batch1}_vs_~{batch2}" - /opt/sv-pipeline/scripts/downstream_analysis_and_filtering/find_batch_effects.R \ - "~{prefix}.~{batch1}_vs_~{batch2}.AF_comparison_table.txt.gz" \ - "~{batch1}" \ - "~{batch2}" \ - "~{batch1}_vs_~{batch2}/~{prefix}" ||true - gzip -f "~{batch1}_vs_~{batch2}/~{prefix}.~{batch1}_vs_~{batch2}.freq_table_wBonferroni.txt" - tar -czvf "~{batch1}_vs_~{batch2}.tar.gz" \ - "~{batch1}_vs_~{batch2}" - >>> - - output { - File merged_table = "~{batch1}_vs_~{batch2}/~{prefix}.~{batch1}_vs_~{batch2}.freq_table_wBonferroni.txt.gz" - File batch_effect_variants = "~{batch1}_vs_~{batch2}/~{prefix}.~{batch1}_vs_~{batch2}.batch_effect_variants.txt" - File correlation_scatterplots_tarball = "~{batch1}_vs_~{batch2}.tar.gz" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} - diff --git a/wdl/prune_add_af.wdl b/wdl/prune_add_af.wdl deleted file mode 100644 index 74fa3038e..000000000 --- a/wdl/prune_add_af.wdl +++ /dev/null @@ -1,120 +0,0 @@ -version 1.0 - - -import "CalcAF.wdl" as calcAF -import "TasksMakeCohortVcf.wdl" as MiniTasks - -workflow prune_and_add_vafs { - input { - File vcf - File vcf_idx - String prefix - String sv_pipeline_docker - - File? sample_pop_assignments #Two-column file with sample ID & pop assignment. "." for pop will ignore sample - File? prune_list #List of samples to be excluded from the output vcf - File? famfile #Used for M/F AF calculations - File? par_bed #Used to mark hemizygous males on chrX/Y - Int sv_per_shard - File contiglist - String? drop_empty_records - - } - Array[Array[String]] contigs=read_tsv(contiglist) - #Iterate over chromosomes - scatter (contig in contigs) { - #Prune VCF - call PruneVcf { - input: - vcf=vcf, - vcf_idx=vcf_idx, - contig=contig[0], - prune_list=prune_list, - prefix=prefix, - sv_pipeline_docker=sv_pipeline_docker - } - #Compute AC, AN, and AF per population & sex combination - call calcAF.CalcAF as getAFs { - input: - vcf=PruneVcf.pruned_vcf, - vcf_idx=PruneVcf.pruned_vcf_idx, - sv_per_shard=sv_per_shard, - prefix=prefix, - sample_pop_assignments=sample_pop_assignments, - famfile=famfile, - par_bed=par_bed, - drop_empty_records=drop_empty_records, - sv_pipeline_docker=sv_pipeline_docker - } - } - - #Merge pruned VCFs with allele info - call MiniTasks.ConcatVcfs as concat_vcfs { - input: - vcfs=getAFs.vcf_wAFs, - naive=true, - outfile_prefix="~{prefix}.pruned_wAFs", - sv_base_mini_docker=sv_pipeline_docker - } - - output { - File output_vcf = concat_vcfs.concat_vcf - File output_vcf_idx = concat_vcfs.concat_vcf_idx - } -} - - -#Shard vcf into single chromosome shards & drop pruned samples -task PruneVcf { - input{ - File vcf - File vcf_idx - String contig - File? prune_list - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 4, - disk_gb: 250, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - command <<< - set -euo pipefail - #Tabix chromosome of interest - tabix -h ~{vcf} ~{contig} | bgzip -c > ~{contig}.vcf.gz - #Get column indexes corresponding to samples to drop, if any exist - if ~{defined(prune_list)}; then - dropidx=$( zfgrep "#" ~{contig}.vcf.gz | fgrep -v "##" \ - | sed 's/\t/\n/g' | awk -v OFS="\t" '{ print NR, $1 }' \ - | fgrep -wf ~{prune_list} | cut -f1 | paste -s -d, ) - zcat ~{contig}.vcf.gz \ - | cut --complement -f"$dropidx" \ - | bgzip -c \ - > "~{prefix}.~{contig}.pruned.vcf.gz" - else - cp "~{contig}.vcf.gz" "~{prefix}.~{contig}.pruned.vcf.gz" - fi - tabix -f "~{prefix}.~{contig}.pruned.vcf.gz" - >>> - - output { - File pruned_vcf = "~{prefix}.~{contig}.pruned.vcf.gz" - File pruned_vcf_idx = "~{prefix}.~{contig}.pruned.vcf.gz.tbi" - } - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - docker: sv_pipeline_docker - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - } -} diff --git a/wdl/qcstructs.wdl b/wdl/qcstructs.wdl deleted file mode 100644 index a801344fb..000000000 --- a/wdl/qcstructs.wdl +++ /dev/null @@ -1,10 +0,0 @@ -version 1.0 - -struct RuntimeAttr { - Float? mem_gb - Int? cpu_cores - Int? disk_gb - Int? boot_disk_gb - Int? preemptible_tries - Int? max_retries -} \ No newline at end of file diff --git a/website/docs/gs/input_files.md b/website/docs/gs/input_files.md index aafd031f1..de356b553 100644 --- a/website/docs/gs/input_files.md +++ b/website/docs/gs/input_files.md @@ -45,16 +45,14 @@ their sample IDs according to the following requirements. - Be a substring of another sample ID in the same cohort - Contain any of the following substrings: `chr`, `name`, `DEL`, `DUP`, `CPX`, `CHROM` -The same requirements apply to family IDs in the PED file, as well as batch IDs and the cohort ID provided as workflow inputs. - -Users should set sample IDs in [GatherSampleEvidence](/docs/modules/gse) with the `sample_id` input, which needs not match -the sample name defined in the BAM/CRAM header. `GetSampleID.wdl` can be used to fetch BAM sample IDs and also generates a set -of alternate IDs that are considered safe for this pipeline. Alternatively, -[this script](https://github.com/talkowski-lab/gnomad_sv_v3/blob/master/sample_id/convert_sample_ids.py) -transforms a list of sample IDs to fit these requirements. - -Sample IDs can be replaced again in [GatherBatchEvidence](/docs/modules/gbe). To do so, set the parameter -`rename_samples = True` and provide updated sample IDs via the `samples` parameter. +The same requirements apply to family IDs in the PED file, as well as batch IDs and the cohort ID provided as workflow +inputs. [This script](https://github.com/broadinstitute/gatk-sv/blob/main/scripts/inputs/convert_sample_ids.py) +can be used to transform a list of sample IDs to meet safe ID requirements. + +Users should assign sample IDs in [GatherSampleEvidence](/docs/modules/gse) with the `sample_id` input, which needs not +match the sample name defined in the BAM/CRAM header. Alternatively, sample IDs can be replaced again in +[GatherBatchEvidence](/docs/modules/gbe) by setting the parameter `rename_samples = True` and providing updated +sample IDs via the `samples` parameter. Note that following inputs will need to be updated with the transformed sample IDs: