Skip to content

Commit

Permalink
GPU tests in CI (#70)
Browse files Browse the repository at this point in the history
Signed-off-by: Igor Gitman <[email protected]>
  • Loading branch information
Kipok authored Jul 25, 2024
1 parent ae0a592 commit 396b838
Show file tree
Hide file tree
Showing 10 changed files with 144 additions and 77 deletions.
39 changes: 37 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,18 @@ name: Unit tests
on:
pull_request:
branches: [ "main" ]
types: [ "labeled" ]

# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

permissions:
contents: read

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
unit-tests:
runs-on: ubuntu-latest
Expand All @@ -34,7 +39,7 @@ jobs:
export NEMO_SKILLS_SANDBOX_HOST="$(docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' `docker ps -a | grep local-sandbox | awk '{print $1}'`)"
echo $NEMO_SKILLS_SANDBOX_HOST
set -o pipefail # this will make sure next line returns non-0 exit code if tests fail
python -m pytest tests/ --junitxml=pytest.xml --cov-report=term-missing:skip-covered --cov=nemo_skills --cov=pipeline --durations=30 -rs -vvv | tee pytest-coverage.txt
python -m pytest tests/ -m "not gpu" --junitxml=pytest.xml --cov-report=term-missing:skip-covered --cov=nemo_skills --cov=pipeline --durations=30 -rs -vvv | tee pytest-coverage.txt
- name: Check help message
run: |
NEMO_SKILLS_CONFIG=cluster_configs/slurm.yaml python tests/check_help.py --all
Expand All @@ -60,4 +65,34 @@ jobs:
NEMO_SKILLS_CONFIG=cluster_configs/slurm.yaml python tests/check_help.py
- name: Check datasets preparation
run: |
NEMO_SKILLS_CONFIG=cluster_configs/slurm.yaml pytest tests/test_datasets.py
NEMO_SKILLS_CONFIG=cluster_configs/slurm.yaml pytest tests/test_datasets.py
gpu-tests:
runs-on: self-hosted-nemo-skills
if: ${{ github.event.label.name == 'run GPU tests' }}
steps:
- uses: actions/checkout@v3
with:
path: ${{ github.run_id }}
- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: "3.10"
- name: Install dependencies
run: |
cd ${{ github.run_id }}
python -m pip install --upgrade pip
pip install .
pip install -r datasets/requirements.txt
pip install pytest pytest-cov pytest-timeout
python datasets/prepare.py
- name: Run GPU tests
timeout-minutes: 120
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
cd ${{ github.run_id }}
nvidia-smi
export NEMO_SKILLS_CONFIG=cluster_configs/local.yaml
set -o pipefail # this will make sure next line returns non-0 exit code if tests fail
./tests/gpu-tests/run.sh /mnt/datadrive/nemo-skills-data/Meta-Llama-3-8B /mnt/datadrive/nemo-skills-data/Meta-Llama-3-8B-Instruct
2 changes: 1 addition & 1 deletion cluster_configs/local.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ cluster: local

containers:
tensorrt_llm: igitman/nemo-skills-trtllm:0.3.0
vllm: igitman/nemo-skills-vllm:0.3.0
vllm: igitman/nemo-skills-vllm:0.3.1
nemo: igitman/nemo-skills-sft:0.3.0
# sandbox is always re-built locally

Expand Down
18 changes: 3 additions & 15 deletions dockerfiles/Dockerfile.vllm
Original file line number Diff line number Diff line change
Expand Up @@ -12,36 +12,24 @@
# See the License for the specific language governing permissions and
# limitations under the License.

ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:23.10-py3
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.02-py3

# build an image that includes only the nemo dependencies, ensures that dependencies
# are included first for optimal caching, and useful for building a development
# image (by specifying build target as `nemo-deps`)
FROM ${BASE_IMAGE} as vllm-deps

# Ensure apt-get won't prompt for selecting options
ENV DEBIAN_FRONTEND=noninteractive
# libavdevice-dev rerquired for latest torchaudio
RUN apt-get update && \
apt-get upgrade -y && \
rm -rf /var/lib/apt/lists/*

# install vllm dependencies
WORKDIR /workspace/

# Hack to force vllm to build even though logic for the check is flawed in their setup.py
ENV TORCH_CUDA_ARCH_LIST="8.0 8.6 9.0+PTX"

# v0.4.2
ARG VLLM_HASH=c7f2cf2b7f67bce5842fedfdba508440fe257375
ARG VLLM_HASH=5448f67635570cee6fc23c7cd166e9d8f7595009

RUN git clone https://github.com/vllm-project/vllm.git && \
cd vllm && \
git checkout ${VLLM_HASH} && \
pip3 install -e . && \
pip install -e . && \
cd ..


# Install requirements
RUN pip install openai>=1.3 ray pydantic flufl.lock fire text_dedup wandb hydra-core geventhttpclient && \
pip install rouge_score outlines sentencepiece transformers>=4.40 sentence-transformers>=2.2.2
Expand Down
1 change: 0 additions & 1 deletion nemo_skills/inference/server/serve_vllm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ python -m vllm.entrypoints.openai.api_server \
--port=${SERVER_PORT} \
--served-model-name "${MODEL_NAME}" \
--tensor-parallel-size=${NUM_GPUS} \
--max-num-seqs=1024 \
--enforce-eager \
--disable-log-requests \
$VLLM_GPU_MEMORY $QUANTIZATION $MAX_SEQUENCE_LENGTH
3 changes: 1 addition & 2 deletions pipeline/summarize_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,7 @@
evaluator=evaluator,
aggregation_mode="best",
)

except subprocess.CalledProcessError as e:
except:
print(f"Error running compute_metrics.py for {benchmark}: {e}")

lines_to_write = []
Expand Down
7 changes: 6 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,9 @@ py_version = 310 # python 3.10 as a target version

[tool.black]
line_length = 119
skip_string_normalization = true
skip_string_normalization = true

[tool.pytest.ini_options]
markers = [
"gpu: tests that require a GPU to run",
]
23 changes: 14 additions & 9 deletions tests/gpu-tests/run.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# will run all tests starting from only a HF checkpoint. Assumes 2 gpus on the machine
# will run all tests starting from only a HF checkpoint. Only requires 1 GPU.
# also need to define HF_TOKEN for some of the tests
# example: HF_TOKEN=<> ./tests/gpu-tests/run.sh /mnt/datadrive/models/Meta-Llama-3-8B
# example: HF_TOKEN=<> ./tests/gpu-tests/run.sh /mnt/datadrive/models/Meta-Llama-3-8B /mnt/datadrive/models/Meta-Llama-3-8B-Instruct
set -e

if [ $# -eq 0 ] ; then
Expand All @@ -13,18 +13,23 @@ export NEMO_SKILLS_TEST_OUTPUT=/tmp/nemo_skills_test_output
mkdir -p $NEMO_SKILLS_TEST_OUTPUT

# first running the conversion tests
pytest tests/gpu-tests/test_conversion.py -k test_hf_trtllm_conversion -s -x
pytest tests/gpu-tests/test_conversion.py -k test_hf_trtllm_conversion -s
export NEMO_SKILLS_TEST_TRTLLM_MODEL=$NEMO_SKILLS_TEST_OUTPUT/trtllm-model
pytest tests/gpu-tests/test_conversion.py -k test_hf_nemo_conversion -s -x
pytest tests/gpu-tests/test_conversion.py -k test_hf_nemo_conversion -s
export NEMO_SKILLS_TEST_NEMO_MODEL=$NEMO_SKILLS_TEST_OUTPUT/model.nemo
pytest tests/gpu-tests/test_conversion.py -k test_nemo_hf_conversion -s -x
pytest tests/gpu-tests/test_conversion.py -k test_nemo_hf_conversion -s
# using the back-converted model to check that it's reasonable
export NEMO_SKILLS_TEST_HF_MODEL=$NEMO_SKILLS_TEST_OUTPUT/hf-model

export LLAMA3_8B_BASE_TRTLLM=$NEMO_SKILLS_TEST_TRTLLM_MODEL
export LLAMA3_8B_BASE_NEMO=$NEMO_SKILLS_TEST_NEMO_MODEL
export LLAMA3_8B_BASE_HF=$NEMO_SKILLS_TEST_HF_MODEL
export LLAMA3_8B_INSTRUCT_HF=$2

# then running the rest of the tests
pytest tests/gpu-tests/test_generation.py -s -x
pytest tests/gpu-tests/test_generation.py -s

# # for sft we are using the tiny random llama model to run much faster
# for sft we are using the tiny random llama model to run much faster
python pipeline/launcher.py \
--cmd "HF_TOKEN=$HF_TOKEN python /code/tests/gpu-tests/make_tiny_llama.py" \
--tasks_per_node 1 \
Expand All @@ -34,10 +39,10 @@ python pipeline/launcher.py \

# converting the model through test
export NEMO_SKILLS_TEST_HF_MODEL=$NEMO_SKILLS_TEST_OUTPUT/tiny-llama
pytest tests/gpu-tests/test_conversion.py -k test_hf_nemo_conversion -s -x
pytest tests/gpu-tests/test_conversion.py -k test_hf_nemo_conversion -s
# untarring model which is required for checkpoint averaging
mkdir -p $NEMO_SKILLS_TEST_OUTPUT/untarred_nemo
tar xvf $NEMO_SKILLS_TEST_OUTPUT/model.nemo -C $NEMO_SKILLS_TEST_OUTPUT/untarred_nemo
export NEMO_SKILLS_TEST_NEMO_MODEL=$NEMO_SKILLS_TEST_OUTPUT/untarred_nemo
# running finetuning
pytest tests/gpu-tests/test_finetuning.py -s -x
pytest tests/gpu-tests/test_finetuning.py -s
9 changes: 7 additions & 2 deletions tests/gpu-tests/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from launcher import CLUSTER_CONFIG, NEMO_SKILLS_CODE, launch_job


@pytest.mark.gpu
def test_hf_trtllm_conversion():
model_path = os.getenv('NEMO_SKILLS_TEST_HF_MODEL')
if not model_path:
Expand All @@ -36,7 +37,7 @@ def test_hf_trtllm_conversion():
--model_dir /model \
--output_dir /tmp/trtllm \
--dtype float16 \
--tp_size 2 \
--tp_size 1 \
&& trtllm-build \
--checkpoint_dir /tmp/trtllm \
--output_dir /output/trtllm-model \
Expand All @@ -62,6 +63,7 @@ def test_hf_trtllm_conversion():
)


@pytest.mark.gpu
def test_hf_nemo_conversion():
model_path = os.getenv('NEMO_SKILLS_TEST_HF_MODEL')
if not model_path:
Expand All @@ -87,13 +89,16 @@ def test_hf_nemo_conversion():
)


@pytest.mark.gpu
def test_nemo_hf_conversion():
model_path = os.getenv('NEMO_SKILLS_TEST_NEMO_MODEL')
if not model_path:
pytest.skip("Define NEMO_SKILLS_TEST_NEMO_MODEL to run this test")
output_path = os.getenv('NEMO_SKILLS_TEST_OUTPUT', '/tmp')

cmd = f"""cd /code && \
# there is a bug in transformers related to slurm, so unsetting the vars
# TODO: remove this once the bug is fixed
cmd = f"""cd /code && unset SLURM_PROCID && unset SLURM_LOCALID && \
HF_TOKEN={os.environ['HF_TOKEN']} python nemo_skills/conversion/nemo_to_hf.py \
--in-path /model \
--out-path /output/hf-model \
Expand Down
5 changes: 3 additions & 2 deletions tests/gpu-tests/test_finetuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from nemo_skills.evaluation.metrics import MathEval, compute_metrics


@pytest.mark.gpu
def test_sft_pipeline():
model_path = os.getenv('NEMO_SKILLS_TEST_NEMO_MODEL')
if not model_path:
Expand All @@ -42,15 +43,15 @@ def test_sft_pipeline():
--expname test \
--nemo_model {model_path} \
--num_nodes 1 \
--num_gpus 2 \
--num_gpus 1 \
--disable_wandb \
--extra_eval_args "+prompt=openmathinstruct/sft ++max_samples=4 --benchmarks gsm8k:1 math:0 --num_jobs 1 --num_gpus 1" \
++model.data.train_ds.file_path=/data/gsm8k/validation-sft.jsonl \
++trainer.sft.max_steps=15 \
++trainer.sft.val_check_interval=10 \
++trainer.sft.limit_val_batches=2 \
++model.data.train_ds.global_batch_size=4 \
++model.tensor_model_parallel_size=2 \
++model.tensor_model_parallel_size=1 \
++model.pipeline_model_parallel_size=1 \
++model.optim.lr=1e-6 \
"""
Expand Down
Loading

0 comments on commit 396b838

Please sign in to comment.