Skip to content

Commit

Permalink
Merge branch 'main' into rocm_sparse_marlin
Browse files Browse the repository at this point in the history
  • Loading branch information
petrex authored Feb 25, 2025
2 parents aea9d81 + 98c4e2e commit f18043d
Show file tree
Hide file tree
Showing 223 changed files with 13,362 additions and 7,803 deletions.
1 change: 1 addition & 0 deletions .github/pytorch-probot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ mergebot: True
ciflow_push_tags:
- ciflow/benchmark
- ciflow/tutorials
- ciflow/rocm
31 changes: 31 additions & 0 deletions .github/workflows/build-wheels_m1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,34 @@ jobs:
runner-type: macos-m1-stable
smoke-test-script: test/smoke_test.py
trigger-event: ${{ github.event_name }}
notify:
runs-on: ubuntu-latest
name: Email notification
needs: [generate-matrix, build]
if: failure() && github.event_name == 'schedule'
steps:
- uses: dawidd6/action-send-mail@v4
with:
server_address: smtp.gmail.com
server_port: 465
username: torchao.notify
password: ${{ secrets.TORCHAO_NOTIFY_PASSWORD }}
from: [email protected]
to: ${{ secrets.TORCHAO_NOTIFY_RECIPIENT }}
subject: Scheduled Build Failure for TorchAO
body: |
Build Failure Notification for TorchAO
A failure occurred in the Build Linux Wheels workflow.
Run Details:
- Workflow: ${{ github.workflow }}
- Run Type: ${{ github.event_name }}
- Repository: ${{ github.repository }}
- Branch/PR: ${{ github.ref }}
- Commit: ${{ github.sha }}
You can view the full run details here:
${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
Error Information:
${{ needs.generate-matrix.result == 'failure' && 'Matrix generation failed' || '' }}
${{ needs.build.result == 'failure' && 'Build job failed' || '' }}
This is an automated notification. Please check the GitHub Actions page for more details about the failure.
34 changes: 33 additions & 1 deletion .github/workflows/build_wheels_aarch64_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ jobs:
test-infra-repository: pytorch/test-infra
test-infra-ref: main
with-cuda: disable

# please note: excluding 3.13t for aarch64 builds for now
python-versions: '["3.9", "3.10", "3.11", "3.12", "3.13"]'
build:
needs: generate-matrix
permissions:
Expand All @@ -53,3 +54,34 @@ jobs:
setup-miniconda: false
secrets:
PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
notify:
runs-on: ubuntu-latest
name: Email notification
needs: [generate-matrix, build]
if: failure() && github.event_name == 'schedule'
steps:
- uses: dawidd6/action-send-mail@v4
with:
server_address: smtp.gmail.com
server_port: 465
username: torchao.notify
password: ${{ secrets.TORCHAO_NOTIFY_PASSWORD }}
from: [email protected]
to: ${{ secrets.TORCHAO_NOTIFY_RECIPIENT }}
subject: Scheduled Build Failure for TorchAO
body: |
Build Failure Notification for TorchAO
A failure occurred in the Build AARCH64 Wheels workflow.
Run Details:
- Workflow: ${{ github.workflow }}
- Run Type: ${{ github.event_name }}
- Repository: ${{ github.repository }}
- Branch/PR: ${{ github.ref }}
- Commit: ${{ github.sha }}
You can view the full run details here:
${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
Error Information:
${{ needs.generate-matrix.result == 'failure' && 'Matrix generation failed' || '' }}
${{ needs.build.result == 'failure' && 'Build job failed' || '' }}
This is an automated notification. Please check the GitHub Actions page for more details about the failure.
37 changes: 37 additions & 0 deletions .github/workflows/build_wheels_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ jobs:
with-cuda: enable
with-rocm: enable
with-xpu: enable
# please note: excluding 3.13t for aarch64 builds for now
python-versions: '["3.9", "3.10", "3.11", "3.12", "3.13"]'

build:
needs: generate-matrix
Expand All @@ -56,3 +58,38 @@ jobs:
upload-to-pypi: cu121
secrets:
PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
notify:
runs-on: ubuntu-latest
name: Email notification
needs: [generate-matrix, build]
if: failure() && github.event_name == 'schedule'
steps:
- uses: dawidd6/action-send-mail@v4
with:
server_address: smtp.gmail.com
server_port: 465
username: torchao.notify
password: ${{ secrets.TORCHAO_NOTIFY_PASSWORD }}
from: [email protected]
to: ${{ secrets.TORCHAO_NOTIFY_RECIPIENT }}
subject: Scheduled Build Failure for TorchAO
body: |
Build Failure Notification for TorchAO
A failure occurred in the Build Linux Wheels workflow.
Run Details:
- Workflow: ${{ github.workflow }}
- Run Type: ${{ github.event_name }}
- Repository: ${{ github.repository }}
- Branch/PR: ${{ github.ref }}
- Commit: ${{ github.sha }}
You can view the full run details here:
${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
Error Information:
${{ needs.generate-matrix.result == 'failure' && 'Matrix generation failed' || '' }}
${{ needs.build.result == 'failure' && 'Build job failed' || '' }}
This is an automated notification. Please check the GitHub Actions page for more details about the failure.
35 changes: 35 additions & 0 deletions .github/workflows/build_wheels_windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,38 @@ jobs:
package-name: ${{ matrix.package-name }}
smoke-test-script: ${{ matrix.smoke-test-script }}
trigger-event: ${{ github.event_name }}
notify:
runs-on: ubuntu-latest
name: Email notification
needs: [generate-matrix, build]
if: failure() && github.event_name == 'schedule'
steps:
- uses: dawidd6/action-send-mail@v4
with:
server_address: smtp.gmail.com
server_port: 465
username: torchao.notify
password: ${{ secrets.TORCHAO_NOTIFY_PASSWORD }}
from: [email protected]
to: ${{ secrets.TORCHAO_NOTIFY_RECIPIENT }}
subject: Scheduled Build Failure for TorchAO
body: |
Build Failure Notification for TorchAO
A failure occurred in the Build Windows Wheels workflow.
Run Details:
- Workflow: ${{ github.workflow }}
- Run Type: ${{ github.event_name }}
- Repository: ${{ github.repository }}
- Branch/PR: ${{ github.ref }}
- Commit: ${{ github.sha }}
You can view the full run details here:
${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
Error Information:
${{ needs.generate-matrix.result == 'failure' && 'Matrix generation failed' || '' }}
${{ needs.build.result == 'failure' && 'Build job failed' || '' }}
This is an automated notification. Please check the GitHub Actions page for more details about the failure.
3 changes: 3 additions & 0 deletions .github/workflows/doc_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ on:
- v[0-9]+.[0-9]+.[0-9]
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
pull_request:
paths:
- 'docs/**'
- '!docs/**'
workflow_dispatch:

concurrency:
Expand Down
12 changes: 10 additions & 2 deletions .github/workflows/float8_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,18 @@ jobs:
include:
- name: SM-89
runs-on: linux.g6.4xlarge.experimental.nvidia.gpu
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu121'
torch-spec: '--pre torch==2.7.0.dev20250122 --index-url https://download.pytorch.org/whl/nightly/cu124'
gpu-arch-type: "cuda"
gpu-arch-version: "12.1"
gpu-arch-version: "12.4"
- name: H100
runs-on: linux.aws.h100
torch-spec: '--pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124'
gpu-arch-type: "cuda"
gpu-arch-version: "12.4"

permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
timeout: 60
Expand Down
55 changes: 55 additions & 0 deletions .github/workflows/float8nocompile_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
name: Run Float8nocompile Tests

on:
push:
branches:
- main
- 'gh/**'
paths:
- 'torchao/prototype/float8nocompile/**'
- '!torchao/prototype/float8nocompile/**'
pull_request:
branches:
- main
- 'gh/**'
paths:
- 'torchao/prototype/float8nocompile/**'
- '!torchao/prototype/float8nocompile/**'

concurrency:
group: floatnocompile_test-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
cancel-in-progress: true

env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}

jobs:
test:
strategy:
fail-fast: false
matrix:
include:
- name: SM-89
runs-on: linux.g6.4xlarge.experimental.nvidia.gpu
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu121'
gpu-arch-type: "cuda"
gpu-arch-version: "12.1"

uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
timeout: 300
runner: ${{ matrix.runs-on }}
gpu-arch-type: ${{ matrix.gpu-arch-type }}
gpu-arch-version: ${{ matrix.gpu-arch-version }}
submodules: recursive
script: |
conda create -n venv python=3.9 -y
conda activate venv
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
python -m pip install --upgrade pip
pip install ${{ matrix.torch-spec }}
pip install -r dev-requirements.txt
pip install .
cd torchao/prototype/float8nocompile
pytest kernels/ --verbose -s
pytest test/train_test.py --verbose -s
10 changes: 6 additions & 4 deletions .github/workflows/nightly_smoke_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ concurrency:
cancel-in-progress: true

env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}

jobs:
test:
Expand All @@ -21,11 +21,13 @@ jobs:
include:
- name: CUDA Nightly
runs-on: linux.g5.12xlarge.nvidia.gpu
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu121'
torch-spec: '--pre torch==2.7.0.dev20250122 --index-url https://download.pytorch.org/whl/nightly/cu124'
gpu-arch-type: "cuda"
gpu-arch-version: "12.1"

gpu-arch-version: "12.4"

permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: ${{ matrix.runs-on }}
Expand Down
7 changes: 5 additions & 2 deletions .github/workflows/regression_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,18 @@ jobs:
include:
- name: CUDA Nightly
runs-on: linux.g5.12xlarge.nvidia.gpu
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu124'
torch-spec: '--pre torch==2.7.0.dev20250122 --index-url https://download.pytorch.org/whl/nightly/cu124'
gpu-arch-type: "cuda"
gpu-arch-version: "12.4"
- name: CPU Nightly
runs-on: linux.4xlarge
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cpu'
torch-spec: '--pre torch==2.7.0.dev20250122 --index-url https://download.pytorch.org/whl/nightly/cpu'
gpu-arch-type: "cpu"
gpu-arch-version: ""

permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
timeout: 120
Expand Down
49 changes: 49 additions & 0 deletions .github/workflows/regression_test_rocm.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name: Run Regression Tests on ROCm

on:
push:
branches:
- main
tags:
- ciflow/rocm/*

concurrency:
group: regression_test-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
cancel-in-progress: true

env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}

jobs:
test-nightly:
strategy:
fail-fast: false
matrix:
include:
- name: ROCM Nightly
runs-on: linux.rocm.gpu.torchao
torch-spec: '--pre torch==2.7.0.dev20250122 --index-url https://download.pytorch.org/whl/nightly/rocm6.3'
gpu-arch-type: "rocm"
gpu-arch-version: "6.3"

permissions:
id-token: write
contents: read
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
timeout: 120
no-sudo: ${{ matrix.gpu-arch-type == 'rocm' }}
runner: ${{ matrix.runs-on }}
gpu-arch-type: ${{ matrix.gpu-arch-type }}
gpu-arch-version: ${{ matrix.gpu-arch-version }}
submodules: recursive
script: |
conda create -n venv python=3.9 -y
conda activate venv
python -m pip install --upgrade pip
pip install ${{ matrix.torch-spec }}
pip install -r dev-requirements.txt
pip install .
export CONDA=$(dirname $(dirname $(which conda)))
export LD_LIBRARY_PATH=$CONDA/lib/:$LD_LIBRARY_PATH
pytest test --verbose -s
Loading

0 comments on commit f18043d

Please sign in to comment.