Skip to content

Commit

Permalink
chore(test): Add E2E tests for Kubeflow Trainer (#2470)
Browse files Browse the repository at this point in the history
* Add e2e tests for Kubeflow Trainer

Signed-off-by: Andrey Velichkevich <[email protected]>

* Add timeout for papermill

Signed-off-by: Andrey Velichkevich <[email protected]>

* Add output as part of make command

Signed-off-by: Andrey Velichkevich <[email protected]>

* Add k8s version to setup cluster

Signed-off-by: Andrey Velichkevich <[email protected]>

* Fix Kind k8s version

Signed-off-by: Andrey Velichkevich <[email protected]>

* Fix 1.29 version

Signed-off-by: Andrey Velichkevich <[email protected]>

* Create script to run Notebook

Signed-off-by: Andrey Velichkevich <[email protected]>

* Download dataset when local_rank=0

Signed-off-by: Andrey Velichkevich <[email protected]>

* Update test/e2e/e2e_test.go

Co-authored-by: Yuki Iwai <[email protected]>
Signed-off-by: Andrey Velichkevich <[email protected]>

* Refactor Go e2e tests

Signed-off-by: Andrey Velichkevich <[email protected]>

* Bump k8s to 1.29.14

Signed-off-by: Andrey Velichkevich <[email protected]>

* Install Kind from go mod

Signed-off-by: Andrey Velichkevich <[email protected]>

* Fix path for Kind package

Signed-off-by: Andrey Velichkevich <[email protected]>

* Fix Go e2e

Signed-off-by: Andrey Velichkevich <[email protected]>

* Reduce number of CPUs
Export Notebook as artifact

Signed-off-by: Andrey Velichkevich <[email protected]>

* Print logs due to flaky test

Signed-off-by: Andrey Velichkevich <[email protected]>

* Fix artifact path

Signed-off-by: Andrey Velichkevich <[email protected]>

* docker pull image

Signed-off-by: Andrey Velichkevich <[email protected]>

* Fix path

Signed-off-by: Andrey Velichkevich <[email protected]>

* Add k8s version to output name

Signed-off-by: Andrey Velichkevich <[email protected]>

* Remove install Kind cmd

Signed-off-by: Andrey Velichkevich <[email protected]>

---------

Signed-off-by: Andrey Velichkevich <[email protected]>
Co-authored-by: Yuki Iwai <[email protected]>
  • Loading branch information
andreyvelich and tenzen-y authored Mar 5, 2025
1 parent 3ec8f07 commit 9e78575
Show file tree
Hide file tree
Showing 18 changed files with 640 additions and 394 deletions.
99 changes: 0 additions & 99 deletions .github/workflows/template-e2e-test/action.yaml

This file was deleted.

48 changes: 43 additions & 5 deletions .github/workflows/test-e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,61 @@ on:
jobs:
e2e-test:
name: E2E Test
runs-on: ubuntu-latest
runs-on:
labels: ubuntu-latest-16-cores
env:
GOPATH: ${{ github.workspace }}/go
defaults:
run:
working-directory: ${{ env.GOPATH }}/src/github.com/kubeflow/trainer

strategy:
fail-fast: false
matrix:
kubernetes-version: ["1.29.3", "1.30.0", "1.31.0"]
# Kubernetes versions for e2e tests on Kind cluster.
kubernetes-version: ["1.29.14", "1.30.0", "1.31.0"]

steps:
- name: Check out code
uses: actions/checkout@v4
with:
path: ${{ env.GOPATH }}/src/github.com/kubeflow/trainer

- name: Setup Go
uses: actions/setup-go@v5
with:
go-version-file: ${{ env.GOPATH }}/src/github.com/kubeflow/trainer/go.mod

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
python-version: 3.11

- name: Install dependencies
run: |
echo "TODO (andreyvelich): Implement E2E Tests"
# pip install -U './sdk'
echo "Install Papermill"
pip install papermill==2.6.0 jupyter==1.1.1 ipykernel==6.29.5
echo "Install Kubeflow SDK"
pip install ./sdk
- name: Setup cluster
run: |
make test-e2e-setup-cluster K8S_VERSION=${{ matrix.kubernetes-version }}
- name: Run e2e with Go
run: |
make test-e2e
- name: Run e2e test for example Notebooks.
run: |
make test-e2e-notebook NOTEBOOK_INPUT=./examples/pytorch/image-classification/mnist.ipynb NOTEBOOK_OUTPUT=./mnist_output_${{ matrix.kubernetes-version }}.ipynb TIMEOUT=900
# TODO (andreyvelich): Discuss how we can upload artifacts for multiple Notebooks.
- name: Upload notebook
uses: actions/upload-artifact@v4
if: always()
with:
name: mnist_output_${{ matrix.kubernetes-version }}.ipynb
path: ${{ env.GOPATH }}/src/github.com/kubeflow/trainer/mnist_output_${{ matrix.kubernetes-version }}.ipynb
retention-days: 1
2 changes: 1 addition & 1 deletion .github/workflows/test-go.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ jobs:
- name: Run Go integration tests
run: |
make test-integration ENVTEST_K8S_VERSION=${{ matrix.kubernetes-version }}
make test-integration K8S_VERSION=${{ matrix.kubernetes-version }}
- name: Coveralls report
uses: shogo82148/actions-goveralls@v1
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ __debug_bin

# Jupyter Notebooks.
**/.ipynb_checkpoints
# The default output for Notebook after Papermill execution.
trainer_output.ipynb

# Python cache files
__pycache__/
Expand Down
30 changes: 27 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,16 @@ help: ## Display this help.

##@ Development

K8S_VERSION ?= 1.32.0

PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))

# Tool Binaries
LOCALBIN ?= $(PROJECT_DIR)/bin

CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen
ENVTEST ?= $(LOCALBIN)/setup-envtest

ENVTEST_K8S_VERSION ?= 1.32
KIND ?= $(LOCALBIN)/kind

# Instructions to download tools for development.
.PHONY: envtest
Expand All @@ -47,6 +49,10 @@ envtest: ## Download the setup-envtest binary if required.
controller-gen: ## Download the controller-gen binary if required.
GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-tools/cmd/[email protected]

.PHONY: kind
kind: ## Download Kind binary if required.
GOBIN=$(LOCALBIN) go install sigs.k8s.io/kind@$(shell go list -m -f '{{.Version}}' sigs.k8s.io/kind)

# Download external CRDs for Go integration testings.
EXTERNAL_CRDS_DIR ?= $(PROJECT_DIR)/manifests/external-crds

Expand Down Expand Up @@ -106,8 +112,9 @@ test: ## Run Go unit test.

.PHONY: test-integration
test-integration: envtest jobset-operator-crd scheduler-plugins-crd ## Run Go integration test.
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" go test ./test/... -coverprofile cover.out
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(K8S_VERSION) -p path)" go test ./test/integration/... -coverprofile cover.out

.PHONY: test-python
test-python: ## Run Python unit test.
export PYTHONPATH=$(PROJECT_DIR)
pip install pytest
Expand All @@ -118,9 +125,26 @@ test-python: ## Run Python unit test.
pytest ./pkg/initializer/model
pytest ./pkg/initializer/utils

.PHONY: test-python-integration
test-python-integration: ## Run Python integration test.
export PYTHONPATH=$(PROJECT_DIR)
pip install pytest
pip install -r ./cmd/initializer/dataset/requirements.txt

pytest ./test/integration/initializer

.PHONY: test-e2e-setup-cluster
test-e2e-setup-cluster: kind ## Setup Kind cluster for e2e test.
KIND=$(KIND) K8S_VERSION=$(K8S_VERSION) ./hack/e2e-setup-cluster.sh

.PHONY: test-e2e
test-e2e: ## Run Go e2e test.
go test ./test/e2e/...

# Input and output location for Notebooks executed with Papermill.
NOTEBOOK_INPUT=$(PROJECT_DIR)/examples/pytorch/image-classification/mnist.ipynb
NOTEBOOK_OUTPUT=$(PROJECT_DIR)/trainer_output.ipynb
PAPERMILL_TIMEOUT=900
.PHONY: test-e2e-notebook
test-e2e-notebook: ## Run Jupyter Notebook with Papermill.
NOTEBOOK_INPUT=$(NOTEBOOK_INPUT) NOTEBOOK_OUTPUT=$(NOTEBOOK_OUTPUT) PAPERMILL_TIMEOUT=$(PAPERMILL_TIMEOUT) ./hack/e2e-run-notebook.sh
Loading

0 comments on commit 9e78575

Please sign in to comment.