Skip to content

Commit

Permalink
Merge branch 'huggingface:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
rbrugaro authored Jul 19, 2024
2 parents 138b1e4 + b362180 commit 86930cd
Show file tree
Hide file tree
Showing 14 changed files with 272 additions and 169 deletions.
40 changes: 12 additions & 28 deletions optimum/commands/export/openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,14 +189,6 @@ def parse_args_openvino(parser: "ArgumentParser"):
action="store_true",
help="Do not add converted tokenizer and detokenizer OpenVINO models.",
)
# TODO : deprecated
optional_group.add_argument("--fp16", action="store_true", help="Compress weights to fp16")
optional_group.add_argument("--int8", action="store_true", help="Compress weights to int8")
optional_group.add_argument(
"--convert-tokenizer",
action="store_true",
help="[Deprecated] Add converted tokenizer and detokenizer with OpenVINO Tokenizers.",
)


class OVExportCommand(BaseOptimumCLICommand):
Expand Down Expand Up @@ -235,24 +227,19 @@ def _get_default_int4_config(model_id_or_path, library_name):

return _DEFAULT_4BIT_CONFIG

library_name = TasksManager.infer_library_from_model(self.args.model, library_name=self.args.library)
if library_name == "sentence_transformers" and self.args.library is None:
logger.warning(
"Library name is not specified. There are multiple possible variants: `sentence_transformers`, `transformers`."
"`transformers` will be selected. If you want to load your model with the `sentence-transformers` library instead, please set --library sentence_transformers"
)
library_name = "transformers"

if self.args.fp16:
logger.warning(
"`--fp16` option is deprecated and will be removed in a future version. Use `--weight-format` instead."
if self.args.library is None:
# TODO: add revision, subfolder and token to args
library_name = TasksManager._infer_library_from_model_name_or_path(
model_name_or_path=self.args.model, cache_dir=self.args.cache_dir
)
self.args.weight_format = "fp16"
if self.args.int8:
logger.warning(
"`--int8` option is deprecated and will be removed in a future version. Use `--weight-format` instead."
)
self.args.weight_format = "int8"
if library_name == "sentence_transformers":
logger.warning(
"Library name is not specified. There are multiple possible variants: `sentence_transformers`, `transformers`."
"`transformers` will be selected. If you want to load your model with the `sentence-transformers` library instead, please set --library sentence_transformers"
)
library_name = "transformers"
else:
library_name = self.args.library

if self.args.weight_format is None:
ov_config = None
Expand Down Expand Up @@ -296,9 +283,6 @@ def _get_default_int4_config(model_id_or_path, library_name):
quantization_config["group_size"] = 128 if "128" in self.args.weight_format else 64
ov_config = OVConfig(quantization_config=quantization_config)

if self.args.convert_tokenizer:
logger.warning("`--convert-tokenizer` option is deprecated. Tokenizer will be converted by default.")

quantization_config = ov_config.quantization_config if ov_config else None
quantize_with_dataset = quantization_config and getattr(quantization_config, "dataset", None) is not None
task = infer_task(self.args.task, self.args.model)
Expand Down
49 changes: 34 additions & 15 deletions optimum/exporters/openvino/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,24 @@
logger = logging.getLogger(__name__)


def infer_task(task, model_name_or_path):
def infer_task(
task,
model_name_or_path,
subfolder: str = "",
revision: Optional[str] = None,
cache_dir: str = HUGGINGFACE_HUB_CACHE,
token: Optional[Union[bool, str]] = None,
):
task = TasksManager.map_from_synonym(task)
if task == "auto":
try:
task = TasksManager.infer_task_from_model(model_name_or_path)
task = TasksManager._infer_task_from_model_name_or_path(
model_name_or_path=model_name_or_path,
subfolder=subfolder,
revision=revision,
cache_dir=cache_dir,
token=token,
)
except KeyError as e:
raise KeyError(
f"The task could not be automatically inferred. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}"
Expand Down Expand Up @@ -193,19 +206,27 @@ def main_export(
ov_config = OVConfig(quantization_config=q_config)

original_task = task
task = infer_task(task, model_name_or_path)
framework = TasksManager.determine_framework(model_name_or_path, subfolder=subfolder, framework=framework)
library_name_is_not_provided = library_name is None
library_name = TasksManager.infer_library_from_model(
model_name_or_path, subfolder=subfolder, library_name=library_name
task = infer_task(
task, model_name_or_path, subfolder=subfolder, revision=revision, cache_dir=cache_dir, token=token
)
framework = TasksManager.determine_framework(
model_name_or_path, subfolder=subfolder, revision=revision, cache_dir=cache_dir, token=token
)

if library_name == "sentence_transformers" and library_name_is_not_provided:
logger.warning(
"Library name is not specified. There are multiple possible variants: `sentence_tenasformers`, `transformers`."
"`transformers` will be selected. If you want to load your model with the `sentence-transformers` library instead, please set --library sentence_transformers"
if library_name is None:
library_name = TasksManager._infer_library_from_model_name_or_path(
model_name_or_path=model_name_or_path,
subfolder=subfolder,
revision=revision,
cache_dir=cache_dir,
token=token,
)
library_name = "transformers"
if library_name == "sentence_transformers":
logger.warning(
"Library name is not specified. There are multiple possible variants: `sentence_tenasformers`, `transformers`."
"`transformers` will be selected. If you want to load your model with the `sentence-transformers` library instead, please set --library sentence_transformers"
)
library_name = "transformers"

do_gptq_patching = False
custom_architecture = False
Expand Down Expand Up @@ -317,9 +338,7 @@ class StoreAttr(object):
)
model.config.pad_token_id = pad_token_id

if "stable-diffusion" in task:
model_type = "stable-diffusion"
elif hasattr(model.config, "export_model_type"):
if hasattr(model.config, "export_model_type"):
model_type = model.config.export_model_type.replace("_", "-")
else:
model_type = model.config.model_type.replace("_", "-")
Expand Down
63 changes: 49 additions & 14 deletions optimum/exporters/openvino/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
from transformers.modeling_utils import PreTrainedModel

if is_diffusers_available():
from diffusers import ModelMixin
from diffusers import DiffusionPipeline, ModelMixin

if is_tf_available():
from transformers.modeling_tf_utils import TFPreTrainedModel
Expand All @@ -74,7 +74,7 @@
from optimum.intel.openvino.configuration import OVConfig


def _save_model(model, path: str, ov_config: Optional["OVConfig"] = None):
def _save_model(model, path: str, ov_config: Optional["OVConfig"] = None, library_name: Optional[str] = None):
compress_to_fp16 = False

if ov_config is not None:
Expand All @@ -90,13 +90,12 @@ def _save_model(model, path: str, ov_config: Optional["OVConfig"] = None):

compress_to_fp16 = ov_config.dtype == "fp16"

library_name = TasksManager.infer_library_from_model(Path(path).parent)
model = _add_version_info_to_model(model, library_name)
save_model(model, path, compress_to_fp16)


def export(
model: Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin"],
model: Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin", "DiffusionPipeline"],
config: OnnxConfig,
output: Path,
opset: Optional[int] = None,
Expand Down Expand Up @@ -139,7 +138,7 @@ def export(
)

if "diffusers" in str(model.__class__) and not is_diffusers_available():
raise ImportError("The pip package `diffusers` is required to export stable diffusion models to ONNX.")
raise ImportError("The package `diffusers` is required to export diffusion models to OpenVINO.")

if stateful:
# This will be checked anyway after the model conversion, but checking it earlier will save time for a user if not suitable version is used
Expand Down Expand Up @@ -198,7 +197,19 @@ def export_tensorflow(
onnx_path = Path(output).with_suffix(".onnx")
input_names, output_names = export_tensorflow_onnx(model, config, opset, onnx_path)
ov_model = convert_model(str(onnx_path))
_save_model(ov_model, output.parent / output, ov_config=ov_config)

if model.__class__.__module__.startswith("optimum"):
# for wrapped models
library_name = TasksManager._infer_library_from_model_or_model_class(model=model.model)
else:
library_name = TasksManager._infer_library_from_model_or_model_class(model=model)

_save_model(
ov_model,
output.parent / output,
ov_config=ov_config,
library_name=library_name,
)
return input_names, output_names, True


Expand Down Expand Up @@ -251,7 +262,19 @@ def export_pytorch_via_onnx(
)
torch.onnx.export = orig_torch_onnx_export
ov_model = convert_model(str(onnx_output))
_save_model(ov_model, output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output, ov_config=ov_config)

if model.__class__.__module__.startswith("optimum"):
# for wrapped models
library_name = TasksManager._infer_library_from_model_or_model_class(model=model.model)
else:
library_name = TasksManager._infer_library_from_model_or_model_class(model=model)

_save_model(
ov_model,
output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output,
ov_config=ov_config,
library_name=library_name,
)
return input_names, output_names, True


Expand Down Expand Up @@ -413,7 +436,18 @@ def ts_patched_forward(*args, **kwargs):
if stateful:
patch_stateful(model.config, ov_model)

_save_model(ov_model, output, ov_config=ov_config)
if model.__module__.startswith("optimum"):
# for wrapped models like timm in optimum.intel.openvino.modeling_timm
library_name = TasksManager._infer_library_from_model_or_model_class(model=model.model)
else:
library_name = TasksManager._infer_library_from_model_or_model_class(model=model)

_save_model(
ov_model,
output,
ov_config=ov_config,
library_name=library_name,
)
clear_class_registry()
del model
gc.collect()
Expand All @@ -422,7 +456,7 @@ def ts_patched_forward(*args, **kwargs):

def export_models(
models_and_export_configs: Dict[
str, Tuple[Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin"], "OnnxConfig"]
str, Tuple[Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin", "DiffusionPipeline"], "OnnxConfig"]
],
output_dir: Path,
opset: Optional[int] = None,
Expand Down Expand Up @@ -491,7 +525,7 @@ def export_models(


def export_from_model(
model: Union["PreTrainedModel", "TFPreTrainedModel"],
model: Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin", "DiffusionPipeline"],
output: Union[str, Path],
task: Optional[str] = None,
ov_config: Optional["OVConfig"] = None,
Expand All @@ -505,14 +539,15 @@ def export_from_model(
trust_remote_code: bool = False,
**kwargs_shapes,
):
model_kwargs = model_kwargs or {}

if ov_config is not None and ov_config.quantization_config and not is_nncf_available():
raise ImportError(
f"Compression of the weights to {ov_config.quantization_config} requires nncf, please install it with `pip install nncf`"
)

model_kwargs = model_kwargs or {}
library_name = TasksManager._infer_library_from_model(model)
TasksManager.standardize_model_attributes(model, library_name)
library_name = TasksManager._infer_library_from_model_or_model_class(model=model)
TasksManager.standardize_model_attributes(model)

if hasattr(model.config, "export_model_type"):
model_type = model.config.export_model_type.replace("_", "-")
Expand All @@ -521,7 +556,7 @@ def export_from_model(

custom_architecture = library_name == "transformers" and model_type not in TasksManager._SUPPORTED_MODEL_TYPE

if task is not None:
if task is not None and task != "auto":
task = TasksManager.map_from_synonym(task)
else:
try:
Expand Down
3 changes: 1 addition & 2 deletions optimum/exporters/openvino/model_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
ChatGLMModelPatcher,
CodeGenModelPatcher,
DBRXModelPatcher,
GemmaModelPatcher,
InternLM2Patcher,
InternLMModelPatcher,
JaisModelPatcher,
Expand Down Expand Up @@ -319,7 +318,7 @@ class GemmaOpenVINOConfig(GemmaOnnxConfig):
def patch_model_for_export(
self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
) -> "ModelPatcher":
return GemmaModelPatcher(self, model, model_kwargs=model_kwargs)
return LlamaModelPatcher(self, model, model_kwargs=model_kwargs)


@register_in_tasks_manager(
Expand Down
63 changes: 39 additions & 24 deletions optimum/exporters/openvino/model_patcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,50 +497,65 @@ def _llama_gemma_update_causal_mask_latest(
_llama_gemma_update_causal_mask = _llama_gemma_update_causal_mask_legacy


class GemmaModelPatcher(DecoderModelPatcher):
def llama_gemma_rotary_emb_forward(self, x, position_ids, seq_len=None):
# adopted from https://github.com/huggingface/transformers/blob/main/src/transformers/models/gemma/modeling_gemma.py#L104
_seq_len = torch.max(position_ids) + 1 if seq_len is None else seq_len
if _seq_len > self.embed_positions.shape[0]:
if seq_len is None:
return self._orig_forward(x, position_ids)
else:
return self._orig_forward(x, position_ids, seq_len)
sincos = self.embed_positions[position_ids]
sin, cos = torch.split(sincos, sincos.shape[-1] // 2, dim=-1)
return cos, sin


class LlamaModelPatcher(DecoderModelPatcher):
def __enter__(self):
super().__enter__()

# gemma has some accuracy issues with bf16 with transformers >= 4.39
# llama/gemma has some accuracy issues with bf16 with transformers >= 4.39
# fill causal mask in slightly different way for avoid overflow on some platforms
if is_transformers_version(">=", "4.39.0"):
self._model.model._orig_update_causal_mask = self._model.model._update_causal_mask
self._model.model._update_causal_mask = types.MethodType(
_llama_gemma_update_causal_mask, self._model.model
)

# init inv_freq for torchscript tracing
# https://github.com/huggingface/transformers/blob/ed74d97871468f3a4695ede50abdc0b55717a84d/src/transformers/models/gemma/modeling_gemma.py#L108
for layer in self._model.model.layers:
if layer.self_attn.rotary_emb.inv_freq is None:
rotary_emb = layer.self_attn.rotary_emb
layer.self_attn.rotary_emb.inv_freq = 1.0 / (
rotary_emb.base ** (torch.arange(0, rotary_emb.dim, 2, dtype=torch.int64).float() / rotary_emb.dim)
)
max_positions = self._model.config.max_position_embeddings

def __exit__(self, exc_type, exc_value, traceback):
super().__exit__(exc_type, exc_value, traceback)
if hasattr(self._model.model, "_orig_update_causal_mask"):
self._model.model._update_causal_mask = self._model.model._orig_update_causal_mask
# cos/sin for rotary position embeddings also having issues with bf16 and efficiency due to calculation on each step
# use precomputed
def create_sinusoidal_positions(num_pos: int, dim: int, base: int = 10000) -> torch.Tensor:
# adopted from https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L101
inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2, dtype=torch.int64) / dim))

sinusoid_inp = torch.einsum(
"i , j -> i j", torch.arange(num_pos, dtype=torch.int64).float(), inv_freq
).float()
emb = torch.cat((sinusoid_inp, sinusoid_inp), dim=-1)
return torch.cat((torch.sin(emb), torch.cos(emb)), dim=1)

class LlamaModelPatcher(DecoderModelPatcher):
def __enter__(self):
super().__enter__()
base = self._model.model.layers[0].self_attn.rotary_emb.base
dim = self._model.model.layers[0].self_attn.rotary_emb.dim
embed_positions = create_sinusoidal_positions(max_positions, dim, base)

# llama has some accuracy issues with bf16 with transformers >= 4.39
# fill causal mask in slightly different way for avoid overflow on some platforms
if is_transformers_version(">=", "4.39.0"):
self._model.model._orig_update_causal_mask = self._model.model._update_causal_mask
self._model.model._update_causal_mask = types.MethodType(
_llama_gemma_update_causal_mask, self._model.model
)
for layer in self._model.model.layers:
layer.self_attn.rotary_emb.register_buffer("embed_positions", embed_positions)
layer.self_attn.rotary_emb._orig_forward = layer.self_attn.rotary_emb.forward

layer.self_attn.rotary_emb.forward = types.MethodType(
llama_gemma_rotary_emb_forward, layer.self_attn.rotary_emb
)

def __exit__(self, exc_type, exc_value, traceback):
super().__exit__(exc_type, exc_value, traceback)
if hasattr(self._model.model, "_orig_update_causal_mask"):
self._model.model._update_causal_mask = self._model.model._orig_update_causal_mask

for layer in self._model.model.layers:
layer.self_attn.rotary_emb.forward = layer.self_attn.rotary_emb._orig_forward


SUPPORT_SDPA = is_torch_version(">", "2.1.0")

Expand Down
Loading

0 comments on commit 86930cd

Please sign in to comment.