Add option to quantize Conv operators in ort-quantize.py

robertknight · Feb 3, 2025 · 4270f88 · 4270f88
1 parent e14fec3
commit 4270f88
Showing 1 changed file with 14 additions and 6 deletions.
diff --git a/tools/ort-quantize.py b/tools/ort-quantize.py
@@ -3,9 +3,19 @@
 import onnx
 from onnxruntime.quantization import quantize_dynamic
 
-parser = ArgumentParser()
+parser = ArgumentParser(description="Quantize ONNX models using dynamic quantization.")
 parser.add_argument("input")
 parser.add_argument("output", nargs="?")
+parser.add_argument(
+    "--quantize-conv",
+    action="store_true",
+    help="""
+Enable quantization of `Conv` operators.
+
+This is disabled by default to avoid producing models that don't work
+in ONNX Runtime. See https://github.com/microsoft/onnxruntime/issues/15888.
+""",
+)
 args = parser.parse_args()
 
 output = args.output or args.input.replace(".onnx", ".quant.onnx")
@@ -25,13 +35,11 @@
     "Transpose",
     # Supported ops from `IntegerOpsRegistry`. These get replaced during quantization.
     "MatMul",  # Replaced by MatMulInteger
-    # "Conv" - Replaced by ConvInteger, which is not implemented yet.
-    #
-    # ConvInteger ops produced by `quantize_dynamic` also don't work in ORT
-    # due to the input data type combination being unsupported.
-    # See https://github.com/microsoft/onnxruntime/issues/15888 .
 ]
 
+if args.quantize_conv:
+    op_types_to_quantize.append("Conv")  # Replaced by ConvInteger
+
 quantize_dynamic(
     args.input,
     output,