Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Docker untangling [VS-1279] #8805

Merged
merged 11 commits into from
May 2, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,6 @@ workflows:
- master
- ah_var_store
- EchoCallset
- rc-vs-903-Konrads-changes
tags:
- /.*/
- name: GvsCreateVATFilesFromBigQuery
Expand Down Expand Up @@ -328,7 +327,7 @@ workflows:
branches:
- master
- ah_var_store
- rc-VS-1282-and-only-write-variant-mt
- vs_1279_docker_untangling
tags:
- /.*/
- name: GvsIngestTieout
Expand Down Expand Up @@ -459,3 +458,10 @@ workflows:
- master
- ah_var_store
- EchoCallset
- name: GvsTieoutPgenToVcf
subclass: WDL
primaryDescriptorPath: /scripts/variantstore/wdl/GvsTieoutPgenToVcf.wdl
filters:
branches:
- ah_var_store
- EchoCallset
9 changes: 5 additions & 4 deletions scripts/variantstore/wdl/GvsExtractCallsetPgenMerged.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ workflow GvsExtractCallsetPgenMerged {
# set to "NONE" if all the reference data was loaded into GVS in GvsImportGenomes
String drop_state = "NONE"

File interval_list = "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.noCentromeres.noTelomeres.interval_list"
File interval_list
File interval_weights_bed = "gs://gvs_quickstart_storage/weights/gvs_full_vet_weights_1kb_padded_orig.bed"

String? variants_docker
Expand All @@ -62,10 +62,10 @@ workflow GvsExtractCallsetPgenMerged {
Boolean write_cost_to_db = true

# Merge
String plink_docker
String? plink_docker
}

if (!defined(git_hash) || !defined(variants_docker) || !defined(cloud_sdk_docker) || !defined(gatk_docker)) {
if (!defined(git_hash) || !defined(variants_docker) || !defined(cloud_sdk_docker) || !defined(gatk_docker) || !defined(plink_docker)) {
call Utils.GetToolVersions {
input:
git_branch_or_tag = git_branch_or_tag,
Expand All @@ -76,6 +76,7 @@ workflow GvsExtractCallsetPgenMerged {
String effective_variants_docker = select_first([variants_docker, GetToolVersions.variants_docker])
String effective_gatk_docker = select_first([gatk_docker, GetToolVersions.gatk_docker])
String effective_git_hash = select_first([git_hash, GetToolVersions.git_hash])
String effective_plink_docker = select_first([plink_docker, GetToolVersions.plink_docker])

call Extract.GvsExtractCallsetPgen {
input:
Expand Down Expand Up @@ -137,7 +138,7 @@ workflow GvsExtractCallsetPgenMerged {
pgen_file_list = SplitFilesByChromosome.pgen_lists[i],
pvar_file_list = SplitFilesByChromosome.pvar_lists[i],
psam_file_list = SplitFilesByChromosome.psam_lists[i],
plink_docker = plink_docker,
plink_docker = effective_plink_docker,
output_file_base_name = "~{output_file_base_name}.${contig}",
merge_disk_size = 1024,
split_count = split_count,
Expand Down
5 changes: 3 additions & 2 deletions scripts/variantstore/wdl/GvsUtils.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,12 @@ task GetToolVersions {
# GVS generally uses the smallest `alpine` version of the Google Cloud SDK as it suffices for most tasks, but
# there are a handlful of tasks that require the larger GNU libc-based `slim`.
String cloud_sdk_slim_docker = "gcr.io/google.com/cloudsdktool/cloud-sdk:435.0.0-slim"
String variants_docker = "us.gcr.io/broad-dsde-methods/variantstore:2024-04-23-alpine-92a8b296e"
String gatk_docker = "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_2024_04_30_74189ec2b"
String variants_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/variants:2024-05-01-alpine-778d8a77294d"
String gatk_docker = "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_2024_05_01_f2588d9"
String variants_nirvana_docker = "us.gcr.io/broad-dsde-methods/variantstore:nirvana_2022_10_19"
String real_time_genomics_docker = "docker.io/realtimegenomics/rtg-tools:latest"
String gotc_imputation_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623"
String plink_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/plink2:2024-05-01-slim-a0a65f52cc0e"

String workspace_bucket = read_string(workspace_bucket_output)
String workspace_id = read_string(workspace_id_output)
Expand Down
48 changes: 34 additions & 14 deletions scripts/variantstore/wdl/extract/build_docker.sh
Original file line number Diff line number Diff line change
@@ -1,29 +1,43 @@
set -o errexit -o nounset -o pipefail

usage() {
echo "

USAGE: ./build_docker.sh

Generate a tag suitable for publication of a Variants Docker image.
Tags will be of the form <ISO 8601 Date>-alpine-<short git hash>.
Build a Variants Docker image with an appropriate tag and push to GAR.
The repo name will be 'us-central1-docker.pkg.dev/broad-dsde-methods/gvs' and the image name will be 'variants'.
Tags will be of the form <ISO 8601 Date>-alpine-<Docker image ID>.

e.g. 2023-10-10-alpine-f000ba44
e.g. 2024-04-22-alpine-f000ba44
"
exit 1
}

TAG=$(python3 ./build_docker_tag.py $*)
if [[ $? -ne 0 ]]
if [[ $# -ne 0 ]]
then
usage
fi

BASE_REPO="broad-dsde-methods/variantstore"
REPO_WITH_TAG="${BASE_REPO}:${TAG}"
GCR_TAG="us.gcr.io/${REPO_WITH_TAG}"
# Write the full Docker image ID to a file. This will look something like:
# sha256:5286e46648c595295dcb58a4cc2ec0b8893c9f26d7d49393908e5ae6d4dea188
docker build . --iidfile idfile.txt
FULL_IMAGE_ID=$(cat idfile.txt)

# Take the slice of this full Docker image ID that corresponds with the output of `docker images`:
IMAGE_ID=${FULL_IMAGE_ID:7:12}

# The Variants Docker image is alpine-based.
IMAGE_TYPE="alpine"

docker build . -t "${REPO_WITH_TAG}"
# Build the image tag using the image type and Docker image ID:
TAG=$(python3 ./build_docker_tag.py --image-id "${IMAGE_ID}" --image-type "${IMAGE_TYPE}")

# Run unit tests before pushing to GCR.
BASE_REPO="broad-dsde-methods/gvs"
REPO_WITH_TAG="${BASE_REPO}/variants:${TAG}"
docker tag "${IMAGE_ID}" "${REPO_WITH_TAG}"

# Run unit tests before pushing.
set +o errexit
fail=0
for test in test_*.py
Expand All @@ -36,13 +50,19 @@ do
done

if [ $fail -ne 0 ]; then
echo "One or more unit test has failed, exiting."
echo "One or more unit tests have failed, exiting."
exit $fail
fi

set -o errexit

docker tag "${REPO_WITH_TAG}" "${GCR_TAG}"
docker push "${GCR_TAG}"
GAR_TAG="us-central1-docker.pkg.dev/${REPO_WITH_TAG}"
docker tag "${REPO_WITH_TAG}" "${GAR_TAG}"

# Docker must be configured for GAR before pushes will work:
# gcloud auth configure-docker us-central1-docker.pkg.dev
docker push "${GAR_TAG}"

echo "Docker image pushed to \"${GAR_TAG}\""

echo "docker image pushed to \"${GCR_TAG}\""
rm idfile.txt
12 changes: 3 additions & 9 deletions scripts/variantstore/wdl/extract/build_docker_tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,13 @@ def build_tag(args):
current_date = datetime.now()
date_string = current_date.strftime('%Y-%m-%d')

if not args.dummy_testing_hash:
proc = run(["git", "rev-parse", "--short", "HEAD"], stdout=subprocess.PIPE)
git_hash = proc.stdout.rstrip().decode('utf-8')
else:
# Do not actually try to run git during testing, the .git directory is not mounted into the container.
git_hash = args.dummy_testing_hash
return f"{date_string}-alpine-{git_hash}"
return f"{date_string}-{args.image_type}-{args.image_id}"


def build_argument_parser():
parser = argparse.ArgumentParser(allow_abbrev=False, description='Build a tag for Variants Docker image')
parser.add_argument('-d', '--dummy-testing-hash', default=None,
help='Dummy short git hash to return during testing')
parser.add_argument('-i', '--image-id', required=True, help='Docker image ID')
parser.add_argument('-t', '--image-type', required=True, help='Docker image type, should be "alpine" or "slim".')
return parser


Expand Down
7 changes: 4 additions & 3 deletions scripts/variantstore/wdl/extract/plink_docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
FROM google/cloud-sdk:448.0.0
FROM google/cloud-sdk:472.0.0-slim

ADD https://s3.amazonaws.com/plink2-assets/alpha4/plink2_linux_avx2_20230922.zip plink2.zip
# Use the stable URL to plink2 from the pgen-jni project.
ADD https://github.com/broadinstitute/pgen-jni/raw/de500ad710ce70a8171fd3d0806857a1f36d8a2e/plink2_assets/plink2_linux_x86_64_20240318.zip plink2.zip

RUN apt-get update && \
apt-get upgrade -y && \
Expand All @@ -10,4 +11,4 @@ RUN apt-get update && \
RUN unzip plink2.zip -d /usr/local/bin &&\
rm plink2.zip

ENV PATH /usr/local/bin/plink2:$PATH
ENV PATH /usr/local/bin/plink2:$PATH
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
set -o errexit -o nounset -o pipefail

usage() {
echo "

USAGE: ./build_plink_docker.sh

Build a PLINK 2 (https://www.cog-genomics.org/plink/2.0/) Docker image with an appropriate tag and push to GAR.
The repo name will be 'us-central1-docker.pkg.dev/broad-dsde-methods/gvs' and the image name will be 'plink2'.
Tags will be of the form <ISO 8601 Date>-slim-<Docker image ID>.

e.g. 2024-04-22-slim-f000ba44
"
exit 1
}

if [[ $# -ne 0 ]]
then
usage
fi

docker build . --iidfile idfile.txt

FULL_IMAGE_ID=$(cat idfile.txt)
IMAGE_ID=${FULL_IMAGE_ID:7:12}
IMAGE_TYPE="slim"
TAG=$(python3 ../build_docker_tag.py --image-id "${IMAGE_ID}" --image-type "${IMAGE_TYPE}")

BASE_REPO="broad-dsde-methods/gvs"
REPO_WITH_TAG="${BASE_REPO}/plink2:${TAG}"
docker tag "${IMAGE_ID}" "${REPO_WITH_TAG}"


GAR_TAG="us-central1-docker.pkg.dev/${REPO_WITH_TAG}"
docker tag "${REPO_WITH_TAG}" "${GAR_TAG}"

# Docker must be configured for GAR before pushes will work:
# gcloud auth configure-docker us-central1-docker.pkg.dev
docker push "${GAR_TAG}"

echo "Docker image pushed to \"${GAR_TAG}\""

rm idfile.txt
2 changes: 1 addition & 1 deletion scripts/variantstore/wdl/extract/test_build_docker_tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ def setUp(self) -> None:
def test_tag(self):
tag_re = re.compile(r"^20\d{2}-\d{2}-\d{2}-alpine-f00ba4ba5$")

args = self.argument_parser.parse_args(['--dummy-testing-hash', 'f00ba4ba5'])
args = self.argument_parser.parse_args(['--image-id', 'f00ba4ba5', '--image-type', 'alpine'])
tag = build_tag(args)
self.assertTrue(tag_re.match(tag))
Loading