From 32661a5cd83511f0f5d79cdf651dd2a5d96b25ea Mon Sep 17 00:00:00 2001 From: Morgan Taylor Date: Tue, 12 Mar 2024 09:28:01 -0400 Subject: [PATCH 1/3] beagle docker in development --- 3rd-party-tools/beagle/Dockerfile | 72 ++++++++++++++++++++ 3rd-party-tools/beagle/README.md | 35 ++++++++++ 3rd-party-tools/beagle/docker_build.sh | 76 ++++++++++++++++++++++ 3rd-party-tools/beagle/docker_versions.tsv | 2 + 4 files changed, 185 insertions(+) create mode 100644 3rd-party-tools/beagle/Dockerfile create mode 100644 3rd-party-tools/beagle/README.md create mode 100755 3rd-party-tools/beagle/docker_build.sh create mode 100644 3rd-party-tools/beagle/docker_versions.tsv diff --git a/3rd-party-tools/beagle/Dockerfile b/3rd-party-tools/beagle/Dockerfile new file mode 100644 index 00000000..421791e0 --- /dev/null +++ b/3rd-party-tools/beagle/Dockerfile @@ -0,0 +1,72 @@ +# Adding a platform tag to ensure that images built on ARM-based machines doesn't break pipelines +FROM --platform="linux/amd64" adoptopenjdk/openjdk8:alpine-slim + +ARG BEAGLE_VERSION=01Mar24.d36 \ + BREF3_VERSION=22Jul22.46e \ + BCFTOOLS_VERSION=1.10.2 + +ENV TERM=xterm-256color \ + TINI_VERSION=v0.19.0 + +LABEL MAINTAINER="Broad Institute DSDE " \ + BEAGLE_VERSION=${BEAGLE_VERSION} \ + BCFTOOLS_VERSION=${BCFTOOLS_VERSION} + +WORKDIR /usr/gitc + +# Install dependencies +RUN set -eux; \ + # apk add --no-cache \ + # bash \ + # curl \ + # findutils \ + # gcc \ + # jq \ + # unzip \ + # wget \ + # ; \ + apk add --no-cache \ + autoconf \ + automake \ + bash \ + bzip2-dev \ + curl \ + g++ \ + gcc \ + gsl-dev \ + make \ + musl-dev \ + perl \ + perl-dev \ + tini \ + wget \ + xz-dev \ + zlib-dev \ + ; \ +# Install BCFTools + wget https://github.com/samtools/bcftools/releases/download/${BCFTOOLS_VERSION}/bcftools-${BCFTOOLS_VERSION}.tar.bz2; \ + tar xf bcftools-${BCFTOOLS_VERSION}.tar.bz2; \ + cd bcftools-${BCFTOOLS_VERSION}; \ + \ + ./configure; \ + make; \ + make install; \ + \ + cd ../; \ + rm -r bcftools-${BCFTOOLS_VERSION}; \ + rm bcftools-${BCFTOOLS_VERSION}.tar.bz2 \ + ; +# Download Beagle jars +RUN \ + # beagle runs phasing and imputation + curl -L https://faculty.washington.edu/browning/beagle/beagle.${BEAGLE_VERSION}.jar > beagle.${BEAGLE_VERSION}.jar \ + ; \ + # bref3 converts a reference panel from vcf to the bref3 format that Beagle needs + curl -L https://faculty.washington.edu/browning/beagle/bref3.${BREF3_VERSION}.jar > bref3.${BREF3_VERSION}.jar \ + ; \ +# Install tini + wget https://github.com/krallin/tini/releases/download/$TINI_VERSION/tini -O /sbin/tini; \ + chmod +x /sbin/tini; + +# Set tini as default entrypoint +ENTRYPOINT ["/sbin/tini", "--" ] diff --git a/3rd-party-tools/beagle/README.md b/3rd-party-tools/beagle/README.md new file mode 100644 index 00000000..64f556fe --- /dev/null +++ b/3rd-party-tools/beagle/README.md @@ -0,0 +1,35 @@ +# Imputation Beagle + +## Quick reference + +Copy and paste to pull this image + +#### `us.gcr.io/broad-gotc-prod/imputation-beagle:0.0.1-01Mar24.d36-xxxx` + +- __What is this image:__ This image is a lightweight alpine-based image for running Beagle in the [ImputationBeagle pipeline](../../../../pipelines/broad/arrays/imputation_beagle/ImputationBeagle.wdl). +- __What is Beagle:__ Beagle is a software package for phasing genotypes and imputing ungenotyped markers. Beagle version 5.4 has improved memory and computational efficiency when analyzing large sequence data sets. See [here](https://faculty.washington.edu/browning/beagle/beagle.html) for more information. +- __How to see Beagle version used in image:__ Please see below. + +## Versioning + +The Imputation Beagle image uses the following convention for versioning: + +#### `us.gcr.io/broad-gotc-prod/samtools:--` + +We keep track of all past versions in [docker_versions](docker_versions.tsv) with the last image listed being the currently used version in WARP. + +You can see more information about the image, including the tool versions, by running the following command: + +```bash +$ docker pull us.gcr.io/broad-gotc-prod/imputation-beagle:0.0.1-01Mar24.d36-xxxx +$ docker inspect us.gcr.io/broad-gotc-prod/imputation-beagle:0.0.1-01Mar24.d36-xxxx +``` + +## Usage + +### Display default menu + +```bash +$ docker run --rm -it \ + us.gcr.io/broad-gotc-prod/imputation-beagle:0.0.1-1.0.2-1663948783 /usr/gitc/beagle +``` \ No newline at end of file diff --git a/3rd-party-tools/beagle/docker_build.sh b/3rd-party-tools/beagle/docker_build.sh new file mode 100755 index 00000000..32491745 --- /dev/null +++ b/3rd-party-tools/beagle/docker_build.sh @@ -0,0 +1,76 @@ +#!/bin/bash +set -e + +# Update version when changes to Dockerfile are made +DOCKER_IMAGE_VERSION=0.0.1 +TIMESTAMP="wip-temp-20240301" # $(date +"%s") +DIR=$(cd $(dirname $0) && pwd) + +# Registries and tags +# GCR_URL="us.gcr.io/broad-gotc-prod/imputation-beagle" + +# GAR setup +GAR_REGION="us-central1" +GAR_PROJECT="morgan-fieldeng-gcp" +GAR_REPOSITORY="imputation-beagle-development" +GAR_IMAGE="imputation-beagle" +GAR_URL="${GAR_REGION}-docker.pkg.dev/${GAR_PROJECT}/${GAR_REPOSITORY}/${GAR_IMAGE}" + +# Beagle version +BEAGLE_VERSION="01Mar24.d36" + +# Necessary tools and help text +TOOLS=(docker gcloud) +HELP="$(basename "$0") [-h|--help] [-b|--beagle] [-t|--tools] -- script to build the Imputation Beagle image and push to GAR + +where: + -h|--help Show help text + -b|--beagle Version of Beagle to use (default: BEAGLE_VERSION=${BEAGLE_VERSION}) + -t|--tools Show tools needed to run script + " + +function main(){ + for t in "${TOOLS[@]}"; do which $t >/dev/null || ok=no; done + if [[ $ok == no ]]; then + echo "Missing one of the following tools: " + for t in "${TOOLS[@]}"; do echo "$t"; done + exit 1 + fi + + while [[ $# -gt 0 ]] + do + key="$1" + case $key in + -b|--beagle) + BEAGLE_VERSION="$2" + shift + shift + ;; + -h|--help) + echo "$HELP" + exit 0 + ;; + -t|--tools) + for t in "${TOOLS[@]}"; do echo $t; done + exit 0 + ;; + *) + shift + ;; + esac + done + + IMAGE_TAG="$DOCKER_IMAGE_VERSION-$BEAGLE_VERSION-$TIMESTAMP" + + echo "building and pushing GCR Image - $GAR_URL:$IMAGE_TAG" + docker build -t "$GAR_URL:$IMAGE_TAG" \ + --build-arg BEAGLE_VERSION="$BEAGLE_VERSION" \ + $DIR + # --no-cache $DIR\ + docker push "$GAR_URL:$IMAGE_TAG" + + echo -e "$GAR_URL:$IMAGE_TAG" >> "$DIR/docker_versions.tsv" + echo "done" +} + +main "$@" \ No newline at end of file diff --git a/3rd-party-tools/beagle/docker_versions.tsv b/3rd-party-tools/beagle/docker_versions.tsv new file mode 100644 index 00000000..b6c4561b --- /dev/null +++ b/3rd-party-tools/beagle/docker_versions.tsv @@ -0,0 +1,2 @@ +us-central1-docker.pkg.dev/morgan-fieldeng-gcp/imputation-beagle-development/imputation-beagle:0.0.1-22Jul22.46e-wip-temp-20240227 +us-central1-docker.pkg.dev/morgan-fieldeng-gcp/imputation-beagle-development/imputation-beagle:0.0.1-01Mar24.d36-wip-temp-20240301 From 00c769a2fa2fdc75e298bde9078cbce6bd12ba70 Mon Sep 17 00:00:00 2001 From: Morgan Taylor Date: Fri, 15 Mar 2024 13:51:41 -0400 Subject: [PATCH 2/3] updates --- 3rd-party-tools/beagle/README.md | 10 +++++----- 3rd-party-tools/beagle/docker_build.sh | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/3rd-party-tools/beagle/README.md b/3rd-party-tools/beagle/README.md index 64f556fe..a0bdc7b8 100644 --- a/3rd-party-tools/beagle/README.md +++ b/3rd-party-tools/beagle/README.md @@ -4,7 +4,7 @@ Copy and paste to pull this image -#### `us.gcr.io/broad-gotc-prod/imputation-beagle:0.0.1-01Mar24.d36-xxxx` +#### `us-central1-docker.pkg.dev/morgan-fieldeng-gcp/imputation-beagle-development/imputation-beagle:0.0.1-01Mar24.d36-wip-temp-20240301` - __What is this image:__ This image is a lightweight alpine-based image for running Beagle in the [ImputationBeagle pipeline](../../../../pipelines/broad/arrays/imputation_beagle/ImputationBeagle.wdl). - __What is Beagle:__ Beagle is a software package for phasing genotypes and imputing ungenotyped markers. Beagle version 5.4 has improved memory and computational efficiency when analyzing large sequence data sets. See [here](https://faculty.washington.edu/browning/beagle/beagle.html) for more information. @@ -14,15 +14,15 @@ Copy and paste to pull this image The Imputation Beagle image uses the following convention for versioning: -#### `us.gcr.io/broad-gotc-prod/samtools:--` +#### `us-central1-docker.pkg.dev/morgan-fieldeng-gcp/imputation-beagle-development/imputation-beagle:--` We keep track of all past versions in [docker_versions](docker_versions.tsv) with the last image listed being the currently used version in WARP. You can see more information about the image, including the tool versions, by running the following command: ```bash -$ docker pull us.gcr.io/broad-gotc-prod/imputation-beagle:0.0.1-01Mar24.d36-xxxx -$ docker inspect us.gcr.io/broad-gotc-prod/imputation-beagle:0.0.1-01Mar24.d36-xxxx +$ docker pull us-central1-docker.pkg.dev/morgan-fieldeng-gcp/imputation-beagle-development/imputation-beagle:0.0.1-01Mar24.d36-wip-temp-20240301 +$ docker inspect us-central1-docker.pkg.dev/morgan-fieldeng-gcp/imputation-beagle-development/imputation-beagle:0.0.1-01Mar24.d36-wip-temp-20240301 ``` ## Usage @@ -31,5 +31,5 @@ $ docker inspect us.gcr.io/broad-gotc-prod/imputation-beagle:0.0.1-01Mar24.d36-x ```bash $ docker run --rm -it \ - us.gcr.io/broad-gotc-prod/imputation-beagle:0.0.1-1.0.2-1663948783 /usr/gitc/beagle + us-central1-docker.pkg.dev/morgan-fieldeng-gcp/imputation-beagle-development/imputation-beagle:0.0.1-01Mar24.d36-wip-temp-20240301 /usr/gitc/beagle ``` \ No newline at end of file diff --git a/3rd-party-tools/beagle/docker_build.sh b/3rd-party-tools/beagle/docker_build.sh index 32491745..1d9ac7c1 100755 --- a/3rd-party-tools/beagle/docker_build.sh +++ b/3rd-party-tools/beagle/docker_build.sh @@ -63,6 +63,8 @@ function main(){ IMAGE_TAG="$DOCKER_IMAGE_VERSION-$BEAGLE_VERSION-$TIMESTAMP" echo "building and pushing GCR Image - $GAR_URL:$IMAGE_TAG" + + # TODO: add `--squash` when ready to productionize. https://docs.docker.com/reference/cli/docker/image/build/#squash docker build -t "$GAR_URL:$IMAGE_TAG" \ --build-arg BEAGLE_VERSION="$BEAGLE_VERSION" \ $DIR From 73c6cbda9aa743d542f855482e86410407bb469c Mon Sep 17 00:00:00 2001 From: Morgan Taylor Date: Wed, 21 Aug 2024 13:56:19 -0400 Subject: [PATCH 3/3] updates --- 3rd-party-tools/beagle/Dockerfile | 12 +----------- 3rd-party-tools/beagle/docker_build.sh | 2 +- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/3rd-party-tools/beagle/Dockerfile b/3rd-party-tools/beagle/Dockerfile index 421791e0..f2c65e23 100644 --- a/3rd-party-tools/beagle/Dockerfile +++ b/3rd-party-tools/beagle/Dockerfile @@ -16,15 +16,6 @@ WORKDIR /usr/gitc # Install dependencies RUN set -eux; \ - # apk add --no-cache \ - # bash \ - # curl \ - # findutils \ - # gcc \ - # jq \ - # unzip \ - # wget \ - # ; \ apk add --no-cache \ autoconf \ automake \ @@ -55,9 +46,8 @@ RUN set -eux; \ cd ../; \ rm -r bcftools-${BCFTOOLS_VERSION}; \ rm bcftools-${BCFTOOLS_VERSION}.tar.bz2 \ - ; + ; \ # Download Beagle jars -RUN \ # beagle runs phasing and imputation curl -L https://faculty.washington.edu/browning/beagle/beagle.${BEAGLE_VERSION}.jar > beagle.${BEAGLE_VERSION}.jar \ ; \ diff --git a/3rd-party-tools/beagle/docker_build.sh b/3rd-party-tools/beagle/docker_build.sh index 1d9ac7c1..dead79b7 100755 --- a/3rd-party-tools/beagle/docker_build.sh +++ b/3rd-party-tools/beagle/docker_build.sh @@ -3,7 +3,7 @@ set -e # Update version when changes to Dockerfile are made DOCKER_IMAGE_VERSION=0.0.1 -TIMESTAMP="wip-temp-20240301" # $(date +"%s") +TIMESTAMP=$(date +"%s") DIR=$(cd $(dirname $0) && pwd) # Registries and tags