Skip to content

Commit

Permalink
fix falcon linear fusion
Browse files Browse the repository at this point in the history
Signed-off-by: jiqing-feng <[email protected]>
  • Loading branch information
jiqing-feng committed Jan 22, 2025
1 parent 6bf3b8b commit 356d51d
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 121 deletions.
6 changes: 5 additions & 1 deletion optimum/exporters/ipex/modeling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -869,7 +869,11 @@ def forward(
residual: torch.Tensor = None,
**kwargs,
):
mlp_hidden_states = self.linear_gelu(hidden_states)
if hasattr(self, "linear_gelu"):
mlp_hidden_states = self.linear_gelu(hidden_states)
else:
mlp_hidden_states = self.act(self.dense_h_to_4h(hidden_states))

if hasattr(self, "linear_add_add"):
output = self.linear_add_add(mlp_hidden_states, attention_output, residual)
else:
Expand Down
120 changes: 0 additions & 120 deletions tests/ipex/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,40 +130,6 @@ def test_pipeline(self, model_arch):
_ = pipe(text)
self.assertEqual(pipe.device, model.device)

@parameterized.expand(SUPPORTED_ARCHITECTURES)
@unittest.skipIf(not is_bitsandbytes_available(), reason="Test requires bitsandbytes")
def test_bnb(self, model_arch):
model_id = MODEL_NAMES[model_arch]
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
ipex_model = self.IPEX_MODEL_CLASS.from_pretrained(
model_id, device_map=DEVICE, quantization_config=quantization_config
)
transformers_model = self.IPEX_MODEL_CLASS.auto_model_class.from_pretrained(
model_id, device_map=DEVICE, quantization_config=quantization_config
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
inputs = "This is a sample input"
tokens = tokenizer(inputs, return_tensors="pt").to(DEVICE)
with torch.no_grad():
transformers_outputs = transformers_model(**tokens)
outputs = ipex_model(**tokens)

# Test re-load model
with tempfile.TemporaryDirectory() as tmpdirname:
ipex_model.save_pretrained(tmpdirname)
loaded_model = self.IPEX_MODEL_CLASS.from_pretrained(tmpdirname, device_map=DEVICE)
loaded_model_outputs = loaded_model(**tokens)
# Test init method
init_model = self.IPEX_MODEL_CLASS(transformers_model)
init_model_outputs = init_model(**tokens)

# Compare tensor outputs
for output_name in {"logits", "last_hidden_state"}:
if output_name in transformers_outputs:
self.assertTrue(torch.allclose(outputs[output_name], transformers_outputs[output_name], atol=1e-3))
self.assertTrue(torch.allclose(outputs[output_name], loaded_model_outputs[output_name]))
self.assertTrue(torch.allclose(outputs[output_name], init_model_outputs[output_name]))


class IPEXModelForSequenceClassificationTest(IPEXModelTest):
IPEX_MODEL_CLASS = IPEXModelForSequenceClassification
Expand Down Expand Up @@ -248,46 +214,6 @@ def test_patched_model(self):
self.assertTrue(torch.allclose(outputs.start_logits, transformers_outputs.start_logits, atol=1e-4))
self.assertTrue(torch.allclose(outputs.end_logits, transformers_outputs.end_logits, atol=1e-4))

@parameterized.expand(SUPPORTED_ARCHITECTURES)
@unittest.skipIf(not is_bitsandbytes_available(), reason="Test requires bitsandbytes")
def test_bnb(self, model_arch):
model_id = MODEL_NAMES[model_arch]
set_seed(SEED)
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
ipex_model = IPEXModelForQuestionAnswering.from_pretrained(
model_id, device_map=DEVICE, quantization_config=quantization_config
)
self.assertIsInstance(ipex_model.config, PretrainedConfig)
transformers_model = AutoModelForQuestionAnswering.from_pretrained(
model_id, device_map=DEVICE, quantization_config=quantization_config
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
inputs = "This is a sample input"
tokens = tokenizer(inputs, return_tensors="pt").to(DEVICE)
with torch.no_grad():
transformers_outputs = transformers_model(**tokens)
outputs = ipex_model(**tokens)

# Test re-load model
with tempfile.TemporaryDirectory() as tmpdirname:
ipex_model.save_pretrained(tmpdirname)
loaded_model = self.IPEX_MODEL_CLASS.from_pretrained(tmpdirname, device_map=DEVICE)
loaded_model_outputs = loaded_model(**tokens)

# Test init method
init_model = self.IPEX_MODEL_CLASS(transformers_model)
init_model_outputs = init_model(**tokens)

self.assertIn("start_logits", outputs)
self.assertIn("end_logits", outputs)
# Compare tensor outputs
self.assertTrue(torch.allclose(outputs.start_logits, transformers_outputs.start_logits, atol=1e-4))
self.assertTrue(torch.allclose(outputs.end_logits, transformers_outputs.end_logits, atol=1e-4))
self.assertTrue(torch.equal(outputs.start_logits, loaded_model_outputs.start_logits))
self.assertTrue(torch.equal(outputs.end_logits, loaded_model_outputs.end_logits))
self.assertTrue(torch.equal(outputs.start_logits, init_model_outputs.start_logits))
self.assertTrue(torch.equal(outputs.end_logits, init_model_outputs.end_logits))


class IPEXModelForCausalLMTest(unittest.TestCase):
IPEX_MODEL_CLASS = IPEXModelForCausalLM
Expand Down Expand Up @@ -799,52 +725,6 @@ def test_ipex_beam_search(self, test_name, model_arch, use_cache):
self.assertIsInstance(outputs, torch.Tensor)
self.assertTrue(torch.equal(outputs, transformers_outputs))

@parameterized.expand(SUPPORTED_ARCHITECTURES)
@unittest.skipIf(not is_bitsandbytes_available(), reason="Test requires bitsandbytes")
def test_bnb(self, model_arch):
model_id = MODEL_NAMES[model_arch]
set_seed(SEED)
dtype = torch.float16 if IS_XPU_AVAILABLE else torch.float32
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
# Test model forward do not need cache.
ipex_model = self.IPEX_MODEL_CLASS.from_pretrained(
model_id, torch_dtype=dtype, quantization_config=quantization_config
)
transformers_model = AutoModelForSeq2SeqLM.from_pretrained(
model_id, torch_dtype=dtype, quantization_config=quantization_config
)
self.assertIsInstance(ipex_model.config, PretrainedConfig)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokens = tokenizer(
"This is a sample",
return_tensors="pt",
return_token_type_ids=False if model_arch in ("llama", "llama2") else None,
)
decoder_start_token_id = transformers_model.config.decoder_start_token_id if model_arch != "mbart" else 2
decoder_inputs = {"decoder_input_ids": torch.ones((1, 1), dtype=torch.long) * decoder_start_token_id}
outputs = ipex_model(**tokens, **decoder_inputs)

self.assertIsInstance(outputs.logits, torch.Tensor)

with torch.no_grad():
transformers_outputs = transformers_model(**tokens, **decoder_inputs)

# Test re-load model
with tempfile.TemporaryDirectory() as tmpdirname:
ipex_model.save_pretrained(tmpdirname)
loaded_model = self.IPEX_MODEL_CLASS.from_pretrained(tmpdirname, torch_dtype=dtype)
loaded_model_outputs = loaded_model(**tokens, **decoder_inputs)

# Test init method
init_model = self.IPEX_MODEL_CLASS(transformers_model)
init_model_outputs = init_model(**tokens, **decoder_inputs)

# Compare tensor outputs
self.assertTrue(torch.allclose(outputs.logits, transformers_outputs.logits, atol=1e-4))
# To avoid float pointing error
self.assertTrue(torch.allclose(outputs.logits, loaded_model_outputs.logits, atol=1e-7))
self.assertTrue(torch.allclose(outputs.logits, init_model_outputs.logits, atol=1e-7))


class IPEXSTModel(unittest.TestCase):
SUPPORTED_ARCHITECTURES = (
Expand Down

0 comments on commit 356d51d

Please sign in to comment.