Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into fp8_check
Browse files Browse the repository at this point in the history
  • Loading branch information
jainapurva committed Nov 25, 2024
2 parents 436d3aa + 9bb1b23 commit dfe2eb7
Show file tree
Hide file tree
Showing 52 changed files with 3,222 additions and 368 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/build_wheels_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ jobs:
# triggered daily from main with a schedule
repository: pytorch/ao
ref: ""
test-infra-repository: pytorch/test-infra
test-infra-ref: main
build-matrix: ${{ needs.generate-matrix.outputs.matrix }}
env-var-script: packaging/env_var_script_linux.sh
pre-script: packaging/pre_build_script.sh
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/regression_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ jobs:
include:
- name: CUDA Nightly
runs-on: linux.g5.12xlarge.nvidia.gpu
torch-spec: '--pre torch==2.6.0.dev20241101 --index-url https://download.pytorch.org/whl/nightly/cu121'
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu124'
gpu-arch-type: "cuda"
gpu-arch-version: "12.1"
gpu-arch-version: "12.4"
- name: CPU Nightly
runs-on: linux.4xlarge
torch-spec: '--pre torch==2.6.0.dev20241101 --index-url https://download.pytorch.org/whl/nightly/cpu'
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cpu'
gpu-arch-type: "cpu"
gpu-arch-version: ""

Expand Down
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -371,4 +371,7 @@ venv/
sweep/

# Model checkpoints
checkpoints/
checkpoints/

# Experimental
torchao/experimental/cmake-out
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ We're also fortunate to be integrated into some of the leading open-source libra
2. Hugging Face diffusers best practices with torch.compile and torchao in a standalone repo [diffusers-torchao](https://github.com/sayakpaul/diffusers-torchao)
3. Mobius HQQ backend leveraged our int4 kernels to get [195 tok/s on a 4090](https://github.com/mobiusml/hqq#faster-inference)
4. [TorchTune](https://github.com/pytorch/torchtune) for our QLoRA and QAT recipes
5. [torchchat](https://github.com/pytorch/torchtune) for post training quantization
5. [torchchat](https://github.com/pytorch/torchchat) for post training quantization
6. [SGLang](https://github.com/sgl-project/sglang/pull/1341) for LLM inference quantization

## Videos
Expand Down
10 changes: 5 additions & 5 deletions examples/sam2_amg_server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ curl -X POST http://127.0.0.1:5000/upload -F 'image=@/path/to/file.jpg' --output
Start the server

```
python server.py ~/checkpoints/sam2 --port <your_port> --host <your_hostname> --fast
python server.py ~/checkpoints/sam2 large --port <your_port> --host <your_hostname> --fast
```

Collect the rles
Expand Down Expand Up @@ -58,7 +58,7 @@ Make sure you've installed https://github.com/facebookresearch/sam2

Start server
```
python server.py ~/checkpoints/sam2 --port <your_port> --host <your_hostname> --baseline
python server.py ~/checkpoints/sam2 large --port <your_port> --host <your_hostname> --baseline
```

Generate and save rles (one line per json via `-w "\n"`)
Expand All @@ -73,7 +73,7 @@ sys 0m4.137s
### 3. Start server with torchao variant of SAM2
Start server
```
python server.py ~/checkpoints/sam2 --port <your_port> --host <your_hostname>
python server.py ~/checkpoints/sam2 large --port <your_port> --host <your_hostname>
```

Generate and save rles (one line per json via `-w "\n"`)
Expand All @@ -88,7 +88,7 @@ sys 0m4.350s
### 4. Start server with torchao variant of SAM2 and `--fast` optimizations
Start server
```
python server.py ~/checkpoints/sam2 --port <your_port> --host <your_hostname> --fast
python server.py ~/checkpoints/sam2 large --port <your_port> --host <your_hostname> --fast
```

Generate and save rles (one line per json via `-w "\n"`)
Expand All @@ -103,7 +103,7 @@ sys 0m4.138s
### 5. Start server with torchao variant of SAM2 and `--fast` and `--furious` optimizations
Start server
```
python server.py ~/checkpoints/sam2 --port <your_port> --host <your_hostname> --fast --furious
python server.py ~/checkpoints/sam2 large --port <your_port> --host <your_hostname> --fast --furious
```

Generate and save rles (one line per json via `-w "\n"`)
Expand Down
48 changes: 48 additions & 0 deletions examples/sam2_amg_server/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import fire
import logging
import matplotlib.pyplot as plt
from server import file_bytes_to_image_tensor
from server import show_anns
from server import model_type_to_paths
from server import MODEL_TYPES_TO_MODEL
from torchao._models.sam2.build_sam import build_sam2
from torchao._models.sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
from torchao._models.sam2.utils.amg import rle_to_mask
from io import BytesIO

def main_docstring():
return f"""
Args:
checkpoint_path (str): Path to folder containing checkpoints from https://github.com/facebookresearch/sam2?tab=readme-ov-file#download-checkpoints
model_type (str): Choose from one of {", ".join(MODEL_TYPES_TO_MODEL.keys())}
input_path (str): Path to input image
output_path (str): Path to output image
"""

def main(checkpoint_path, model_type, input_path, output_path, points_per_batch=1024, output_format='png', verbose=False):
device = "cuda"
sam2_checkpoint, model_cfg = model_type_to_paths(checkpoint_path, model_type)
if verbose:
print(f"Loading model {sam2_checkpoint} with config {model_cfg}")
sam2 = build_sam2(model_cfg, sam2_checkpoint, device=device, apply_postprocessing=False)
mask_generator = SAM2AutomaticMaskGenerator(sam2, points_per_batch=points_per_batch, output_mode="uncompressed_rle")
image_tensor = file_bytes_to_image_tensor(bytearray(open(input_path, 'rb').read()))
if verbose:
print(f"Loaded image of size {tuple(image_tensor.shape)} and generating mask.")
masks = mask_generator.generate(image_tensor)

# Save an example
plt.figure(figsize=(image_tensor.shape[1]/100., image_tensor.shape[0]/100.), dpi=100)
plt.imshow(image_tensor)
show_anns(masks, rle_to_mask)
plt.axis('off')
plt.tight_layout()
buf = BytesIO()
plt.savefig(buf, format=output_format)
buf.seek(0)
with open(output_path, "wb") as file:
file.write(buf.getvalue())

main.__doc__ = main_docstring()
if __name__ == "__main__":
fire.Fire(main)
1 change: 1 addition & 0 deletions examples/sam2_amg_server/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ hydra-core
tqdm
iopath
python-multipart
requests
67 changes: 64 additions & 3 deletions examples/sam2_amg_server/server.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import itertools
import requests
import uvicorn
import fire
import tempfile
Expand Down Expand Up @@ -37,6 +38,23 @@
# torch._dynamo.config.capture_dynamic_output_shape_ops = True
torch._dynamo.config.capture_dynamic_output_shape_ops = True

def download_file(url, download_dir):
# Create the directory if it doesn't exist
download_dir = Path(download_dir)
download_dir.mkdir(parents=True, exist_ok=True)
# Extract the file name from the URL
file_name = url.split('/')[-1]
# Define the full path for the downloaded file
file_path = download_dir / file_name
# Download the file
response = requests.get(url, stream=True)
response.raise_for_status() # Raise an error for bad responses
# Write the file to the specified directory
print(f"Downloading '{file_name}' to '{download_dir}'")
with open(file_path, 'wb') as file:
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)
print(f"Downloaded '{file_name}' to '{download_dir}'")

def example_shapes():
return [(848, 480, 3),
Expand Down Expand Up @@ -272,7 +290,51 @@ def unittest_fn(masks, ref_masks, order_by_area=False, verbose=False):
print(f"mIoU is {miou} with equal count {equal_count} out of {len(masks)}")


MODEL_TYPES_TO_CONFIG = {
"tiny": "sam2.1_hiera_t.yaml",
"small": "sam2.1_hiera_s.yaml",
"plus": "sam2.1_hiera_b+.yaml",
"large": "sam2.1_hiera_l.yaml",
}

MODEL_TYPES_TO_MODEL = {
"tiny": "sam2.1_hiera_tiny.pt",
"small": "sam2.1_hiera_small.pt",
"plus": "sam2.1_hiera_base_plus.pt",
"large": "sam2.1_hiera_large.pt",
}


MODEL_TYPES_TO_URL = {
"tiny": "https://dl.fbaipublicfiles.com/segment_anything_2/092824/sam2.1_hiera_tiny.pt",
"small": "https://dl.fbaipublicfiles.com/segment_anything_2/092824/sam2.1_hiera_small.pt",
"plus": "https://dl.fbaipublicfiles.com/segment_anything_2/092824/sam2.1_hiera_base_plus.pt",
"large": "https://dl.fbaipublicfiles.com/segment_anything_2/092824/sam2.1_hiera_large.pt",
}


def main_docstring():
return f"""
Args:
checkpoint_path (str): Path to folder containing checkpoints from https://github.com/facebookresearch/sam2?tab=readme-ov-file#download-checkpoints
model_type (str): Choose from one of {", ".join(MODEL_TYPES_TO_MODEL.keys())}
"""


def model_type_to_paths(checkpoint_path, model_type):
if model_type not in MODEL_TYPES_TO_CONFIG.keys():
raise ValueError(f"Expected model_type to be one of {', '.join(MODEL_TYPES_TO_MODEL.keys())} but got {model_type}")
sam2_checkpoint = Path(checkpoint_path) / Path(MODEL_TYPES_TO_MODEL[model_type])
if not sam2_checkpoint.exists():
print(f"Can't find checkpoint {sam2_checkpoint} in folder {checkpoint_path}. Downloading.")
download_file(MODEL_TYPES_TO_URL[model_type], checkpoint_path)
assert sam2_checkpoint.exists(), "Can't find downloaded file. Please open an issue."
model_cfg = f"configs/sam2.1/{MODEL_TYPES_TO_CONFIG[model_type]}"
return sam2_checkpoint, model_cfg


def main(checkpoint_path,
model_type,
baseline=False,
fast=False,
furious=False,
Expand Down Expand Up @@ -306,9 +368,7 @@ def main(checkpoint_path,
from torchao._models.sam2.utils.amg import rle_to_mask

device = "cuda"
from pathlib import Path
sam2_checkpoint = Path(checkpoint_path) / Path("sam2.1_hiera_large.pt")
model_cfg = "configs/sam2.1/sam2.1_hiera_l.yaml"
sam2_checkpoint, model_cfg = model_type_to_paths(checkpoint_path, model_type)

logging.info(f"Loading model {sam2_checkpoint} with config {model_cfg}")
sam2 = build_sam2(model_cfg, sam2_checkpoint, device=device, apply_postprocessing=False)
Expand Down Expand Up @@ -450,5 +510,6 @@ async def upload_image(image: UploadFile = File(...)):
# uvicorn.run(app, host=host, port=port, log_level="info")
uvicorn.run(app, host=host, port=port)

main.__doc__ = main_docstring()
if __name__ == "__main__":
fire.Fire(main)
3 changes: 3 additions & 0 deletions test/dtypes/test_affine_quantized.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,9 @@ class TestAffineQuantizedBasic(TestCase):
@common_utils.parametrize("device", COMMON_DEVICES)
@common_utils.parametrize("dtype", COMMON_DTYPES)
def test_flatten_unflatten(self, apply_quant, device, dtype):
if device == "cpu":
self.skipTest(f"Temporarily skipping for {device}")

linear = torch.nn.Linear(128, 256, dtype=dtype, device=device)
ql = apply_quant(linear)
lp_tensor = ql.weight
Expand Down
25 changes: 1 addition & 24 deletions test/float8/test_dtensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@

import pytest
import torch
import torch.nn as nn
import torch.nn.functional as F

from torchao.utils import TORCH_VERSION_AT_LEAST_2_5

Expand Down Expand Up @@ -49,6 +47,7 @@
)
from torchao.float8.float8_utils import e4m3_dtype, tensor_to_scale
from torchao.float8.fsdp_utils import WeightWithDynamicFloat8CastTensor
from torchao.testing.float8.dtensor_utils import ToyModel


def setup_distributed():
Expand All @@ -59,28 +58,6 @@ def setup_distributed():
return device_mesh


class FeedForward(nn.Module):
"""MLP based model"""

def __init__(self):
super(FeedForward, self).__init__()
self.w1 = nn.Linear(16, 32, bias=False)
self.w2 = nn.Linear(16, 32, bias=False)
self.out_proj = nn.Linear(32, 16, bias=False)

def forward(self, x):
return self.out_proj(F.silu(self.w1(x)) * self.w2(x))


class ToyModel(nn.Module):
def __init__(self):
super(ToyModel, self).__init__()
self.ffn = FeedForward()

def forward(self, x):
return self.ffn(x)


def _test_scaled_mm(mesh: DeviceMesh, size=16):
device = mesh.device_type
fp8_dtype = e4m3_dtype
Expand Down
4 changes: 4 additions & 0 deletions test/float8/test_dtensor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,8 @@ if python -c 'import torch;print(torch.cuda.is_available())' | grep -q "False";
exit
fi

# integration tests for TP/SP
NCCL_DEBUG=WARN torchrun --nproc_per_node 2 test/float8/test_dtensor.py

# integration smoke tests for FSDP2 + TP
NCCL_DEBUG=WARN torchrun --nproc_per_node 4 test/float8/test_fsdp2_tp.py
Loading

0 comments on commit dfe2eb7

Please sign in to comment.