From 439d61f79cf55d5d0b28334f577b6ac3c5ced28f Mon Sep 17 00:00:00 2001
From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com>
Date: Wed, 10 Jul 2024 18:32:36 +0200
Subject: [PATCH] Set default 4-bit compression ratio to 1.0 (#815)

* Set default 4-bit compression ratio to 1.0

* udpate doc

* set default ratio using default config
---
 docs/source/openvino/export.mdx     | 2 +-
 optimum/commands/export/openvino.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/source/openvino/export.mdx b/docs/source/openvino/export.mdx
index eed980076d..c11016fde1 100644
--- a/docs/source/openvino/export.mdx
+++ b/docs/source/openvino/export.mdx
@@ -60,7 +60,7 @@ Optional arguments:
   --pad-token-id PAD_TOKEN_ID
                         This is needed by some models, for some tasks. If not provided, will attempt to use the tokenizer to guess it.
   --ratio RATIO         A parameter used when applying 4-bit quantization to control the ratio between 4-bit and 8-bit quantization. If set to 0.8, 80% of the layers will be quantized to int4 while
-                        20% will be quantized to int8. This helps to achieve better accuracy at the sacrifice of the model size and inference latency. Default value is 0.8.
+                        20% will be quantized to int8. This helps to achieve better accuracy at the sacrifice of the model size and inference latency. Default value is 1.0.
   --sym                 Whether to apply symmetric quantization
   --group-size GROUP_SIZE
                         The group size to use for int4 quantization. Recommended value is 128 and -1 will results in per-column quantization.
diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py
index ee1f62388f..2bdee32e17 100644
--- a/optimum/commands/export/openvino.py
+++ b/optimum/commands/export/openvino.py
@@ -102,7 +102,7 @@ def parse_args_openvino(parser: "ArgumentParser"):
         default=None,
         help=(
             "A parameter used when applying 4-bit quantization to control the ratio between 4-bit and 8-bit quantization. If set to 0.8, 80%% of the layers will be quantized to int4 "
-            "while 20%% will be quantized to int8. This helps to achieve better accuracy at the sacrifice of the model size and inference latency. Default value is 0.8."
+            "while 20%% will be quantized to int8. This helps to achieve better accuracy at the sacrifice of the model size and inference latency. Default value is 1.0."
         ),
     )
     optional_group.add_argument(
@@ -277,7 +277,7 @@ def _get_default_int4_config(model_id_or_path, library_name):
             else:
                 quantization_config = {
                     "bits": 8 if is_int8 else 4,
-                    "ratio": 1 if is_int8 else (self.args.ratio or 0.8),
+                    "ratio": 1 if is_int8 else (self.args.ratio or _DEFAULT_4BIT_CONFIG["ratio"]),
                     "sym": self.args.sym or False,
                     "group_size": -1 if is_int8 else self.args.group_size,
                     "all_layers": None if is_int8 else self.args.all_layers,