Fix potential out-of-bound access in int8_mm.py (#1751)

* fix potential out-of-bound access * remove unused EVEN_K * refactor fix with triton.heuristics * restore EVEN_K as an input * fix typo * fix another typo * ruff reformatted
pytorch · Feb 25, 2025 · 98c4e2e · 98c4e2e
1 parent 38e36de
commit 98c4e2e
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/torchao/prototype/quantized_training/int8_mm.py b/torchao/prototype/quantized_training/int8_mm.py
@@ -54,6 +54,7 @@
 
 
 @triton.autotune(configs=configs, key=["M", "N", "K", "stride_ak", "stride_bk"])
+@triton.heuristics({"EVEN_K": lambda args: args["K"] % args["BLOCK_K"] == 0})
 @triton.jit
 def _scaled_int8_mm_kernel(
     A_ptr,
@@ -176,7 +177,6 @@ def scaled_int8_mm_cuda(A: Tensor, B: Tensor, row_scale: Tensor, col_scale: Tens
         *A.stride(),
         *B.stride(),
         *C.stride(),
-        EVEN_K=K % 2 == 0,
         COL_SCALE_SCALAR=col_scale.numel() == 1,
     )
     return C