Skip to content

Commit

Permalink
Docker untangling [VS-1279] (#8805)
Browse files Browse the repository at this point in the history
  • Loading branch information
mcovarr authored May 2, 2024
1 parent 32334d3 commit 9768fb6
Show file tree
Hide file tree
Showing 8 changed files with 101 additions and 35 deletions.
10 changes: 8 additions & 2 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,6 @@ workflows:
- master
- ah_var_store
- EchoCallset
- rc-vs-903-Konrads-changes
tags:
- /.*/
- name: GvsCreateVATFilesFromBigQuery
Expand Down Expand Up @@ -328,7 +327,7 @@ workflows:
branches:
- master
- ah_var_store
- rc-VS-1282-and-only-write-variant-mt
- vs_1279_docker_untangling
tags:
- /.*/
- name: GvsIngestTieout
Expand Down Expand Up @@ -459,3 +458,10 @@ workflows:
- master
- ah_var_store
- EchoCallset
- name: GvsTieoutPgenToVcf
subclass: WDL
primaryDescriptorPath: /scripts/variantstore/wdl/GvsTieoutPgenToVcf.wdl
filters:
branches:
- ah_var_store
- EchoCallset
9 changes: 5 additions & 4 deletions scripts/variantstore/wdl/GvsExtractCallsetPgenMerged.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ workflow GvsExtractCallsetPgenMerged {
# set to "NONE" if all the reference data was loaded into GVS in GvsImportGenomes
String drop_state = "NONE"

File interval_list = "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.noCentromeres.noTelomeres.interval_list"
File interval_list
File interval_weights_bed = "gs://gvs_quickstart_storage/weights/gvs_full_vet_weights_1kb_padded_orig.bed"

String? variants_docker
Expand All @@ -62,10 +62,10 @@ workflow GvsExtractCallsetPgenMerged {
Boolean write_cost_to_db = true

# Merge
String plink_docker
String? plink_docker
}

if (!defined(git_hash) || !defined(variants_docker) || !defined(cloud_sdk_docker) || !defined(gatk_docker)) {
if (!defined(git_hash) || !defined(variants_docker) || !defined(cloud_sdk_docker) || !defined(gatk_docker) || !defined(plink_docker)) {
call Utils.GetToolVersions {
input:
git_branch_or_tag = git_branch_or_tag,
Expand All @@ -76,6 +76,7 @@ workflow GvsExtractCallsetPgenMerged {
String effective_variants_docker = select_first([variants_docker, GetToolVersions.variants_docker])
String effective_gatk_docker = select_first([gatk_docker, GetToolVersions.gatk_docker])
String effective_git_hash = select_first([git_hash, GetToolVersions.git_hash])
String effective_plink_docker = select_first([plink_docker, GetToolVersions.plink_docker])

call Extract.GvsExtractCallsetPgen {
input:
Expand Down Expand Up @@ -137,7 +138,7 @@ workflow GvsExtractCallsetPgenMerged {
pgen_file_list = SplitFilesByChromosome.pgen_lists[i],
pvar_file_list = SplitFilesByChromosome.pvar_lists[i],
psam_file_list = SplitFilesByChromosome.psam_lists[i],
plink_docker = plink_docker,
plink_docker = effective_plink_docker,
output_file_base_name = "~{output_file_base_name}.${contig}",
merge_disk_size = 1024,
split_count = split_count,
Expand Down
5 changes: 3 additions & 2 deletions scripts/variantstore/wdl/GvsUtils.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,12 @@ task GetToolVersions {
# GVS generally uses the smallest `alpine` version of the Google Cloud SDK as it suffices for most tasks, but
# there are a handlful of tasks that require the larger GNU libc-based `slim`.
String cloud_sdk_slim_docker = "gcr.io/google.com/cloudsdktool/cloud-sdk:435.0.0-slim"
String variants_docker = "us.gcr.io/broad-dsde-methods/variantstore:2024-04-23-alpine-92a8b296e"
String gatk_docker = "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_2024_04_30_74189ec2b"
String variants_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/variants:2024-05-02-alpine-778d8a77294d"
String gatk_docker = "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_2024_05_02_ccc82b8"
String variants_nirvana_docker = "us.gcr.io/broad-dsde-methods/variantstore:nirvana_2022_10_19"
String real_time_genomics_docker = "docker.io/realtimegenomics/rtg-tools:latest"
String gotc_imputation_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623"
String plink_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/plink2:2024-05-02-slim-a0a65f52cc0e"

String workspace_bucket = read_string(workspace_bucket_output)
String workspace_id = read_string(workspace_id_output)
Expand Down
48 changes: 34 additions & 14 deletions scripts/variantstore/wdl/extract/build_docker.sh
Original file line number Diff line number Diff line change
@@ -1,29 +1,43 @@
set -o errexit -o nounset -o pipefail

usage() {
echo "
USAGE: ./build_docker.sh
Generate a tag suitable for publication of a Variants Docker image.
Tags will be of the form <ISO 8601 Date>-alpine-<short git hash>.
Build a Variants Docker image with an appropriate tag and push to GAR.
The repo name will be 'us-central1-docker.pkg.dev/broad-dsde-methods/gvs' and the image name will be 'variants'.
Tags will be of the form <ISO 8601 Date>-alpine-<Docker image ID>.
e.g. 2023-10-10-alpine-f000ba44
e.g. 2024-04-22-alpine-f000ba44
"
exit 1
}

TAG=$(python3 ./build_docker_tag.py $*)
if [[ $? -ne 0 ]]
if [[ $# -ne 0 ]]
then
usage
fi

BASE_REPO="broad-dsde-methods/variantstore"
REPO_WITH_TAG="${BASE_REPO}:${TAG}"
GCR_TAG="us.gcr.io/${REPO_WITH_TAG}"
# Write the full Docker image ID to a file. This will look something like:
# sha256:5286e46648c595295dcb58a4cc2ec0b8893c9f26d7d49393908e5ae6d4dea188
docker build . --iidfile idfile.txt
FULL_IMAGE_ID=$(cat idfile.txt)

# Take the slice of this full Docker image ID that corresponds with the output of `docker images`:
IMAGE_ID=${FULL_IMAGE_ID:7:12}

# The Variants Docker image is alpine-based.
IMAGE_TYPE="alpine"

docker build . -t "${REPO_WITH_TAG}"
# Build the image tag using the image type and Docker image ID:
TAG=$(python3 ./build_docker_tag.py --image-id "${IMAGE_ID}" --image-type "${IMAGE_TYPE}")

# Run unit tests before pushing to GCR.
BASE_REPO="broad-dsde-methods/gvs"
REPO_WITH_TAG="${BASE_REPO}/variants:${TAG}"
docker tag "${IMAGE_ID}" "${REPO_WITH_TAG}"

# Run unit tests before pushing.
set +o errexit
fail=0
for test in test_*.py
Expand All @@ -36,13 +50,19 @@ do
done

if [ $fail -ne 0 ]; then
echo "One or more unit test has failed, exiting."
echo "One or more unit tests have failed, exiting."
exit $fail
fi

set -o errexit

docker tag "${REPO_WITH_TAG}" "${GCR_TAG}"
docker push "${GCR_TAG}"
GAR_TAG="us-central1-docker.pkg.dev/${REPO_WITH_TAG}"
docker tag "${REPO_WITH_TAG}" "${GAR_TAG}"

# Docker must be configured for GAR before pushes will work:
# gcloud auth configure-docker us-central1-docker.pkg.dev
docker push "${GAR_TAG}"

echo "Docker image pushed to \"${GAR_TAG}\""

echo "docker image pushed to \"${GCR_TAG}\""
rm idfile.txt
12 changes: 3 additions & 9 deletions scripts/variantstore/wdl/extract/build_docker_tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,13 @@ def build_tag(args):
current_date = datetime.now()
date_string = current_date.strftime('%Y-%m-%d')

if not args.dummy_testing_hash:
proc = run(["git", "rev-parse", "--short", "HEAD"], stdout=subprocess.PIPE)
git_hash = proc.stdout.rstrip().decode('utf-8')
else:
# Do not actually try to run git during testing, the .git directory is not mounted into the container.
git_hash = args.dummy_testing_hash
return f"{date_string}-alpine-{git_hash}"
return f"{date_string}-{args.image_type}-{args.image_id}"


def build_argument_parser():
parser = argparse.ArgumentParser(allow_abbrev=False, description='Build a tag for Variants Docker image')
parser.add_argument('-d', '--dummy-testing-hash', default=None,
help='Dummy short git hash to return during testing')
parser.add_argument('-i', '--image-id', required=True, help='Docker image ID')
parser.add_argument('-t', '--image-type', required=True, help='Docker image type, should be "alpine" or "slim".')
return parser


Expand Down
7 changes: 4 additions & 3 deletions scripts/variantstore/wdl/extract/plink_docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
FROM google/cloud-sdk:448.0.0
FROM google/cloud-sdk:472.0.0-slim

ADD https://s3.amazonaws.com/plink2-assets/alpha4/plink2_linux_avx2_20230922.zip plink2.zip
# Use the stable URL to plink2 from the pgen-jni project.
ADD https://github.com/broadinstitute/pgen-jni/raw/de500ad710ce70a8171fd3d0806857a1f36d8a2e/plink2_assets/plink2_linux_x86_64_20240318.zip plink2.zip

RUN apt-get update && \
apt-get upgrade -y && \
Expand All @@ -10,4 +11,4 @@ RUN apt-get update && \
RUN unzip plink2.zip -d /usr/local/bin &&\
rm plink2.zip

ENV PATH /usr/local/bin/plink2:$PATH
ENV PATH /usr/local/bin/plink2:$PATH
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
set -o errexit -o nounset -o pipefail

usage() {
echo "
USAGE: ./build_plink_docker.sh
Build a PLINK 2 (https://www.cog-genomics.org/plink/2.0/) Docker image with an appropriate tag and push to GAR.
The repo name will be 'us-central1-docker.pkg.dev/broad-dsde-methods/gvs' and the image name will be 'plink2'.
Tags will be of the form <ISO 8601 Date>-slim-<Docker image ID>.
e.g. 2024-04-22-slim-f000ba44
"
exit 1
}

if [[ $# -ne 0 ]]
then
usage
fi

docker build . --iidfile idfile.txt

FULL_IMAGE_ID=$(cat idfile.txt)
IMAGE_ID=${FULL_IMAGE_ID:7:12}
IMAGE_TYPE="slim"
TAG=$(python3 ../build_docker_tag.py --image-id "${IMAGE_ID}" --image-type "${IMAGE_TYPE}")

BASE_REPO="broad-dsde-methods/gvs"
REPO_WITH_TAG="${BASE_REPO}/plink2:${TAG}"
docker tag "${IMAGE_ID}" "${REPO_WITH_TAG}"


GAR_TAG="us-central1-docker.pkg.dev/${REPO_WITH_TAG}"
docker tag "${REPO_WITH_TAG}" "${GAR_TAG}"

# Docker must be configured for GAR before pushes will work:
# gcloud auth configure-docker us-central1-docker.pkg.dev
docker push "${GAR_TAG}"

echo "Docker image pushed to \"${GAR_TAG}\""

rm idfile.txt
2 changes: 1 addition & 1 deletion scripts/variantstore/wdl/extract/test_build_docker_tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ def setUp(self) -> None:
def test_tag(self):
tag_re = re.compile(r"^20\d{2}-\d{2}-\d{2}-alpine-f00ba4ba5$")

args = self.argument_parser.parse_args(['--dummy-testing-hash', 'f00ba4ba5'])
args = self.argument_parser.parse_args(['--image-id', 'f00ba4ba5', '--image-type', 'alpine'])
tag = build_tag(args)
self.assertTrue(tag_re.match(tag))

0 comments on commit 9768fb6

Please sign in to comment.