Skip to content

Commit

Permalink
Add option to quantize Conv operators in ort-quantize.py
Browse files Browse the repository at this point in the history
  • Loading branch information
robertknight committed Feb 3, 2025
1 parent e14fec3 commit 4270f88
Showing 1 changed file with 14 additions and 6 deletions.
20 changes: 14 additions & 6 deletions tools/ort-quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,19 @@
import onnx
from onnxruntime.quantization import quantize_dynamic

parser = ArgumentParser()
parser = ArgumentParser(description="Quantize ONNX models using dynamic quantization.")
parser.add_argument("input")
parser.add_argument("output", nargs="?")
parser.add_argument(
"--quantize-conv",
action="store_true",
help="""
Enable quantization of `Conv` operators.
This is disabled by default to avoid producing models that don't work
in ONNX Runtime. See https://github.com/microsoft/onnxruntime/issues/15888.
""",
)
args = parser.parse_args()

output = args.output or args.input.replace(".onnx", ".quant.onnx")
Expand All @@ -25,13 +35,11 @@
"Transpose",
# Supported ops from `IntegerOpsRegistry`. These get replaced during quantization.
"MatMul", # Replaced by MatMulInteger
# "Conv" - Replaced by ConvInteger, which is not implemented yet.
#
# ConvInteger ops produced by `quantize_dynamic` also don't work in ORT
# due to the input data type combination being unsupported.
# See https://github.com/microsoft/onnxruntime/issues/15888 .
]

if args.quantize_conv:
op_types_to_quantize.append("Conv") # Replaced by ConvInteger

quantize_dynamic(
args.input,
output,
Expand Down

0 comments on commit 4270f88

Please sign in to comment.