huggingface · jiqing-feng · Dec 9, 2024 · Dec 9, 2024 · Dec 9, 2024 · Dec 9, 2024
diff --git a/optimum/exporters/ipex/model_patcher.py b/optimum/exporters/ipex/model_patcher.py
@@ -14,7 +14,7 @@
 
 from transformers.models.bert.modeling_bert import BertIntermediate
 from transformers.models.falcon.modeling_falcon import FalconDecoderLayer, FalconModel
-from transformers.models.gpt2.modeling_gpt2 import GPT2MLP, GPT2Attention, GPT2Block, GPT2Model
+from transformers.models.gpt2.modeling_gpt2 import GPT2Block, GPT2Model
 from transformers.models.llama.modeling_llama import (
     LlamaDecoderLayer,
     LlamaModel,
@@ -27,13 +27,11 @@
 
 from .modeling_utils import (
     _IPEX_MINIMUM_VERSION_FOR_PATCHING,
-    _IPEXGPT2MLP,
     _falcon_model_forward,
-    _gpt2_block_forward,
     _gpt2_model_forward,
     _ipex_rms_layer_norm_forward,
     _IPEXFalconDecoderLayer,
-    _IPEXGPT2Attention,
+    _IPEXGPT2Block,
     _IPEXIntermediate,
     _IPEXLlamaDecoderLayer,
     _llama_model_forward,
@@ -59,12 +57,12 @@ def convert_functions(m, target_m, new_function_name, new_function):
         convert_functions(sub_m, target_m, new_function_name, new_function)
 
 
-def convert_class(m, target_m, new_class, config=None):
+def convert_class(m, target_m, new_class, device, config):
     for name, sub_m in m.named_children():
         if isinstance(sub_m, target_m):
-            new_m = new_class(sub_m, config)
+            new_m = new_class(sub_m, device, config)
             setattr(m, name, new_m)
-        convert_class(sub_m, target_m, new_class, config)
+        convert_class(sub_m, target_m, new_class, device, config)
 
 
 def patch_op(m, target_m, new_op_name, new_op):
@@ -82,7 +80,7 @@ def _patch_llama_model(model):
     """
     convert_functions(model, LlamaModel, "forward", _llama_model_forward)
     convert_functions(model, LlamaRMSNorm, "forward", _ipex_rms_layer_norm_forward)
-    convert_class(model, LlamaDecoderLayer, _IPEXLlamaDecoderLayer, model.config)
+    convert_class(model, LlamaDecoderLayer, _IPEXLlamaDecoderLayer, model.device, model.config)
     return model
 
 
@@ -98,21 +96,20 @@ def _patch_falcon_model(model):
     setattr(model.config, "num_key_value_heads", num_key_value_heads)
     convert_functions(model, FalconModel, "forward", _falcon_model_forward)
     replace_customized_linear_with_linear(model)
-    convert_class(model, FalconDecoderLayer, _IPEXFalconDecoderLayer, model.config)
+    convert_class(model, FalconDecoderLayer, _IPEXFalconDecoderLayer, model.device, model.config)
     return model
 
 
 def _patch_gpt2_model(model):
     """
     Patch gpt2 model:
         1. Use IPEX paged attention
+        2. Linear fusion with (Linear + Add)
     """
     num_key_value_heads = model.config.num_attention_heads
     setattr(model.config, "num_key_value_heads", num_key_value_heads)
     convert_functions(model, GPT2Model, "forward", _gpt2_model_forward)
-    convert_functions(model, GPT2Block, "forward", _gpt2_block_forward)
-    convert_class(model, GPT2Attention, _IPEXGPT2Attention, model.config)
-    convert_class(model, GPT2MLP, _IPEXGPT2MLP, model.config)
+    convert_class(model, GPT2Block, _IPEXGPT2Block, model.device, model.config)
     return model
 
 
@@ -121,7 +118,7 @@ def _patch_bert_model(model):
     Patch bert model:
         1. Linear fusion with Linear + Gelu
     """
-    convert_class(model, BertIntermediate, _IPEXIntermediate)
+    convert_class(model, BertIntermediate, _IPEXIntermediate, model.device, model.config)
     return model
 
 
@@ -130,7 +127,7 @@ def _patch_vit_model(model):
     Patch vit model:
         1. Linear fusion with Linear + Gelu
     """
-    convert_class(model, ViTIntermediate, _IPEXIntermediate)
+    convert_class(model, ViTIntermediate, _IPEXIntermediate, model.device, model.config)
     return model