Docker builds #24

Workflow file for this run

.github/workflows/docker-build.yml at 4175e1a

	name: Docker builds

	on:
	push:
	branches: [master, "release/*"]
	pull_request:
	branches: [master, "release/*"]
	types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
	paths:
	- ".actions/*"
	- ".github/workflows/docker-build.yml"
	- "dockers/**"
	- "requirements/*.txt"
	- "requirements/pytorch/**"
	- "requirements/fabric/**"
	- "setup.py"
	- "!requirements/*/docs.txt"
	- "!*.md"
	- "!*/.md"
	schedule:
	- cron: "0 0 * * *" # at the end of every day
	release:
	types: [published]
	workflow_dispatch: {}

	concurrency:
	group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}-${{ github.event_name }}
	cancel-in-progress: ${{ github.event_name == 'pull_request' }}

	env:
	PUSH_NIGHTLY: ${{ github.event_name == 'schedule' \|\| github.event_name == 'workflow_dispatch' }}
	PUSH_RELEASE: ${{ startsWith(github.ref, 'refs/tags/') \|\| github.event_name == 'release' }}

	jobs:
	build-pl:
	# the images generated by this job are not used anywhere in this repository. they are just meant to be available
	# for users
	if: github.event.pull_request.draft == false
	runs-on: ubuntu-latest
	strategy:
	fail-fast: false
	matrix:
	include:
	# We only release one docker image per PyTorch version.
	# Make sure the matrix here matches the one below.
	- { python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.8.0" }
	- { python_version: "3.9", pytorch_version: "1.13", cuda_version: "12.0.1" }
	- { python_version: "3.10", pytorch_version: "2.0", cuda_version: "11.8.0" }
	- { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" }
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: true
	- uses: docker/setup-buildx-action@v3
	- uses: docker/login-action@v3
	if: env.PUSH_RELEASE == 'true' && github.repository_owner == 'Lightning-AI'
	with:
	username: ${{ secrets.DOCKER_USERNAME }}
	password: ${{ secrets.DOCKER_PASSWORD }}

	- name: Get release version
	if: github.event_name == 'release'
	# For workflows triggered by release, `GITHUB_REF` is the release tag created.
	run: echo "RELEASE_VERSION=$(echo ${GITHUB_REF##*/})" >> $GITHUB_ENV
	- name: Set tags
	run: \|
	import os

	repo = "pytorchlightning/pytorch_lightning"
	ver = os.getenv('RELEASE_VERSION')
	py_ver = "${{ matrix.python_version }}"
	pt_ver = "${{ matrix.pytorch_version }}"
	cuda_ver = "${{ matrix.cuda_version }}"
	tags = [f"latest-py{py_ver}-torch{pt_ver}-cuda{cuda_ver}"]
	if ver:
	tags += [f"{ver}-py{py_ver}-torch{pt_ver}-cuda{cuda_ver}"]
	if py_ver == '3.10' and pt_ver == '2.1' and cuda_ver == '12.1.0':
	tags += ["latest"]

	tags = [f"{repo}:{tag}" for tag in tags]
	with open(os.getenv('GITHUB_ENV'), "a") as gh_env:
	gh_env.write("DOCKER_TAGS=" + ",".join(tags))
	shell: python

	- uses: docker/build-push-action@v5
	with:
	build-args: \|
	PYTHON_VERSION=${{ matrix.python_version }}
	PYTORCH_VERSION=${{ matrix.pytorch_version }}
	CUDA_VERSION=${{ matrix.cuda_version }}
	LIGHTNING_VERSION=${{ env.RELEASE_VERSION }}
	file: dockers/release/Dockerfile
	push: ${{ env.PUSH_RELEASE }} # pushed in release-docker.yml only when PL is released
	tags: ${{ env.DOCKER_TAGS }}
	timeout-minutes: 35

	build-cuda:
	if: github.event.pull_request.draft == false
	runs-on: ubuntu-latest
	strategy:
	fail-fast: false
	matrix:
	include:
	# These are the base images for PL release docker images.
	# Make sure the matrix here matches the one above.
	- { python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.8.0" }
	- { python_version: "3.9", pytorch_version: "1.13", cuda_version: "12.0.1" }
	- { python_version: "3.10", pytorch_version: "2.0", cuda_version: "11.8.0" }
	- { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" }
	- { python_version: "3.10", pytorch_version: "2.2", cuda_version: "12.1.0" }
	- { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.1.0" }
	- { python_version: "3.11", pytorch_version: "2.2", cuda_version: "12.1.0" }
	# - { python_version: "3.12", pytorch_version: "2.2", cuda_version: "12.1.0" } # todo: pending on `onnxruntime`
	steps:
	- uses: actions/checkout@v4
	- uses: docker/setup-buildx-action@v3
	- uses: docker/login-action@v3
	if: env.PUSH_NIGHTLY == 'true' && github.repository_owner == 'Lightning-AI'
	with:
	username: ${{ secrets.DOCKER_USERNAME }}
	password: ${{ secrets.DOCKER_PASSWORD }}
	- uses: docker/build-push-action@v5
	with:
	build-args: \|
	PYTHON_VERSION=${{ matrix.python_version }}
	PYTORCH_VERSION=${{ matrix.pytorch_version }}
	CUDA_VERSION=${{ matrix.cuda_version }}
	file: dockers/base-cuda/Dockerfile
	push: ${{ env.PUSH_NIGHTLY }}
	tags: "pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}-cuda${{ matrix.cuda_version }}"
	timeout-minutes: 95
	- uses: ravsamhq/notify-slack-action@v2
	if: failure() && env.PUSH_NIGHTLY == 'true'
	with:
	status: ${{ job.status }}
	token: ${{ secrets.GITHUB_TOKEN }}
	notification_title: ${{ format('CUDA; {0} py{1} for {2}', runner.os, matrix.python_version, matrix.pytorch_version) }}
	message_format: "{emoji} {workflow} {status_message}, see <{run_url}\|detail>, cc: <@U01A5T7EY9M>" # akihironitta
	env:
	SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}

	build-NGC:
	if: github.event.pull_request.draft == false
	# fixme: use larger machine or optimize image size
	# runs-on: ubuntu-latest-4-cores
	# then drop continue-on-error
	runs-on: ubuntu-latest
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	- name: Build Conda Docker
	# publish master/release
	continue-on-error: true
	uses: docker/build-push-action@v5
	with:
	file: dockers/nvidia/Dockerfile
	push: false
	timeout-minutes: 55

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Docker builds #24

Workflow file

Docker builds #24

Jobs

Run details

Workflow file for this run