You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Qwen/Qwen2.5-7B-Instruct: FileNotFoundError: Cannot determine framework from given checkpoint location. There should be a pytorch_model*.bin for PyTorch or tf_model*.h5 for TensorFlow.
#1160
Open
hteeyeoh opened this issue
Feb 13, 2025
· 0 comments
Try to use 'optimum-cli export openvino' command to convert Qwen2 model to openvinoIR then initialize with iGPU in my python code below:
HuggingFacePipeline.from_model_id(
model_id="/tmp/model_cache/Qwen/Qwen2.5-7B-Instruct",
task="text-generation",
backend="openvino",
model_kwargs={
"device": config.LLM_INFERENCE_DEVICE,
"ov_config": {
"PERFORMANCE_HINT": "LATENCY",
"NUM_STREAMS": "1",
"CACHE_DIR": "/tmp/model_cache/Qwen/Qwen2.5-7B-Instruct/model_cache",
},
"trust_remote_code": True,
},
pipeline_kwargs={"max_new_tokens": config.MAX_TOKENS},
)
But it get into error below:
| The model /tmp/model_cache/Qwen/Qwen2.5-7B-Instruct was already converted to the OpenVINO IR but got export=True, the model will be converted to OpenVINO once again. Don't forget to save the resulting model with .save_pretrained()
| Traceback (most recent call last):
| File "/usr/local/lib/python3.11/site-packages/langchain_huggingface/llms/huggingface_pipeline.py", line 145, in from_model_id
| model = OVModelForCausalLM.from_pretrained(
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/optimum/intel/openvino/modeling_base.py", line 469, in from_pretrained
| return super().from_pretrained(
| ^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/optimum/modeling_base.py", line 438, in from_pretrained
| return from_pretrained_method(
| ^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/optimum/intel/openvino/modeling_decoder.py", line 865, in _from_pretrained
| causal_model = init_cls(
| ^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/optimum/intel/openvino/modeling_decoder.py", line 194, in init
| self.compile()
| File "/usr/local/lib/python3.11/site-packages/optimum/intel/openvino/modeling_decoder.py", line 400, in compile
| super().compile()
| File "/usr/local/lib/python3.11/site-packages/optimum/intel/openvino/modeling_base.py", line 671, in compile
| self.request = self._compile_model(self.model, self._device, ov_config, self.model_save_dir)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/optimum/intel/openvino/modeling_base.py", line 280, in _compile_model
| compiled_model = core.compile_model(model, device.upper() if device is not None else device, config=ov_config)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/openvino/_ov_api.py", line 597, in compile_model
| super().compile_model(model, device_name, {} if config is None else config),
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| RuntimeError: Exception from src/inference/src/cpp/core.cpp:109:
| Exception from src/inference/src/dev/plugin.cpp:53:
| Check 'false' failed at src/plugins/intel_gpu/src/plugin/program_builder.cpp:191:
| [GPU] ProgramBuilder build failed!
| Exception from src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp:372:
| [GPU] clFinish, error code: -5
|
|
|
|
|
| During handling of the above exception, another exception occurred:
|
| Traceback (most recent call last):
| File "/usr/local/bin/uvicorn", line 8, in
| sys.exit(main())
| ^^^^^^
| File "/usr/local/lib/python3.11/site-packages/click/core.py", line 1161, in call
| return self.main(*args, **kwargs)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/click/core.py", line 1082, in main
| rv = self.invoke(ctx)
| ^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/click/core.py", line 1443, in invoke
| return ctx.invoke(self.callback, **ctx.params)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/click/core.py", line 788, in invoke
| return __callback(*args, kwargs)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/uvicorn/main.py", line 412, in main
| run(
| File "/usr/local/lib/python3.11/site-packages/uvicorn/main.py", line 579, in run
| server.run()
| File "/usr/local/lib/python3.11/site-packages/uvicorn/server.py", line 66, in run
| return asyncio.run(self.serve(sockets=sockets))
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/asyncio/runners.py", line 190, in run
| return runner.run(main)
| ^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/asyncio/runners.py", line 118, in run
| return self._loop.run_until_complete(task)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/asyncio/base_events.py", line 654, in run_until_complete
| return future.result()
| ^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/uvicorn/server.py", line 70, in serve
| await self._serve(sockets)
| File "/usr/local/lib/python3.11/site-packages/uvicorn/server.py", line 77, in _serve
| config.load()
| File "/usr/local/lib/python3.11/site-packages/uvicorn/config.py", line 435, in load
| self.loaded_app = import_from_string(self.app)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/uvicorn/importer.py", line 19, in import_from_string
| module = importlib.import_module(module_str)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/importlib/init.py", line 126, in import_module
| return _bootstrap._gcd_import(name[level:], package, level)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "", line 1204, in _gcd_import
| File "", line 1176, in _find_and_load
| File "", line 1147, in _find_and_load_unlocked
| File "", line 690, in _load_unlocked
| File "", line 940, in exec_module
| File "", line 241, in _call_with_frames_removed
| File "/my-app/app/server.py", line 12, in
| from .chain import (
| File "/my-app/app/chain.py", line 53, in
| llm = HuggingFacePipeline.from_model_id(
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/langchain_huggingface/llms/huggingface_pipeline.py", line 151, in from_model_id
| model = OVModelForCausalLM.from_pretrained(
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/optimum/intel/openvino/modeling_base.py", line 469, in from_pretrained
| return super().from_pretrained(
| ^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/optimum/modeling_base.py", line 438, in from_pretrained
| return from_pretrained_method(
| ^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/optimum/intel/openvino/modeling_decoder.py", line 313, in _from_transformers
| main_export(
| File "/usr/local/lib/python3.11/site-packages/optimum/exporters/openvino/main.py", line 204, in main_export
| framework = TasksManager.determine_framework(
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/optimum/exporters/tasks.py", line 1562, in determine_framework
| raise FileNotFoundError(
| FileNotFoundError: Cannot determine framework from given checkpoint location. There should be a pytorch_model.bin for PyTorch or tf_model.h5 for TensorFlow.
It seems to work fine with "Intel/neural-chat-7b-v3-3" model.
The text was updated successfully, but these errors were encountered:
Try to use 'optimum-cli export openvino' command to convert Qwen2 model to openvinoIR then initialize with iGPU in my python code below:
HuggingFacePipeline.from_model_id(
model_id="/tmp/model_cache/Qwen/Qwen2.5-7B-Instruct",
task="text-generation",
backend="openvino",
model_kwargs={
"device": config.LLM_INFERENCE_DEVICE,
"ov_config": {
"PERFORMANCE_HINT": "LATENCY",
"NUM_STREAMS": "1",
"CACHE_DIR": "/tmp/model_cache/Qwen/Qwen2.5-7B-Instruct/model_cache",
},
"trust_remote_code": True,
},
pipeline_kwargs={"max_new_tokens": config.MAX_TOKENS},
)
But it get into error below:
| The model /tmp/model_cache/Qwen/Qwen2.5-7B-Instruct was already converted to the OpenVINO IR but got
export=True
, the model will be converted to OpenVINO once again. Don't forget to save the resulting model with.save_pretrained()
| Traceback (most recent call last):
| File "/usr/local/lib/python3.11/site-packages/langchain_huggingface/llms/huggingface_pipeline.py", line 145, in from_model_id
| model = OVModelForCausalLM.from_pretrained(
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/optimum/intel/openvino/modeling_base.py", line 469, in from_pretrained
| return super().from_pretrained(
| ^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/optimum/modeling_base.py", line 438, in from_pretrained
| return from_pretrained_method(
| ^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/optimum/intel/openvino/modeling_decoder.py", line 865, in _from_pretrained
| causal_model = init_cls(
| ^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/optimum/intel/openvino/modeling_decoder.py", line 194, in init
| self.compile()
| File "/usr/local/lib/python3.11/site-packages/optimum/intel/openvino/modeling_decoder.py", line 400, in compile
| super().compile()
| File "/usr/local/lib/python3.11/site-packages/optimum/intel/openvino/modeling_base.py", line 671, in compile
| self.request = self._compile_model(self.model, self._device, ov_config, self.model_save_dir)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/optimum/intel/openvino/modeling_base.py", line 280, in _compile_model
| compiled_model = core.compile_model(model, device.upper() if device is not None else device, config=ov_config)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/openvino/_ov_api.py", line 597, in compile_model
| super().compile_model(model, device_name, {} if config is None else config),
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| RuntimeError: Exception from src/inference/src/cpp/core.cpp:109:
| Exception from src/inference/src/dev/plugin.cpp:53:
| Check 'false' failed at src/plugins/intel_gpu/src/plugin/program_builder.cpp:191:
| [GPU] ProgramBuilder build failed!
| Exception from src/plugins/intel_gpu/src/runtime/ocl/ocl_stream.cpp:372:
| [GPU] clFinish, error code: -5
|
|
|
|
|
| During handling of the above exception, another exception occurred:
|
| Traceback (most recent call last):
| File "/usr/local/bin/uvicorn", line 8, in
| sys.exit(main())
| ^^^^^^
| File "/usr/local/lib/python3.11/site-packages/click/core.py", line 1161, in call
| return self.main(*args, **kwargs)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/click/core.py", line 1082, in main
| rv = self.invoke(ctx)
| ^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/click/core.py", line 1443, in invoke
| return ctx.invoke(self.callback, **ctx.params)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/click/core.py", line 788, in invoke
| return __callback(*args, kwargs)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/uvicorn/main.py", line 412, in main
| run(
| File "/usr/local/lib/python3.11/site-packages/uvicorn/main.py", line 579, in run
| server.run()
| File "/usr/local/lib/python3.11/site-packages/uvicorn/server.py", line 66, in run
| return asyncio.run(self.serve(sockets=sockets))
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/asyncio/runners.py", line 190, in run
| return runner.run(main)
| ^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/asyncio/runners.py", line 118, in run
| return self._loop.run_until_complete(task)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/asyncio/base_events.py", line 654, in run_until_complete
| return future.result()
| ^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/uvicorn/server.py", line 70, in serve
| await self._serve(sockets)
| File "/usr/local/lib/python3.11/site-packages/uvicorn/server.py", line 77, in _serve
| config.load()
| File "/usr/local/lib/python3.11/site-packages/uvicorn/config.py", line 435, in load
| self.loaded_app = import_from_string(self.app)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/uvicorn/importer.py", line 19, in import_from_string
| module = importlib.import_module(module_str)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/importlib/init.py", line 126, in import_module
| return _bootstrap._gcd_import(name[level:], package, level)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "", line 1204, in _gcd_import
| File "", line 1176, in _find_and_load
| File "", line 1147, in _find_and_load_unlocked
| File "", line 690, in _load_unlocked
| File "", line 940, in exec_module
| File "", line 241, in _call_with_frames_removed
| File "/my-app/app/server.py", line 12, in
| from .chain import (
| File "/my-app/app/chain.py", line 53, in
| llm = HuggingFacePipeline.from_model_id(
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/langchain_huggingface/llms/huggingface_pipeline.py", line 151, in from_model_id
| model = OVModelForCausalLM.from_pretrained(
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/optimum/intel/openvino/modeling_base.py", line 469, in from_pretrained
| return super().from_pretrained(
| ^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/optimum/modeling_base.py", line 438, in from_pretrained
| return from_pretrained_method(
| ^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/optimum/intel/openvino/modeling_decoder.py", line 313, in _from_transformers
| main_export(
| File "/usr/local/lib/python3.11/site-packages/optimum/exporters/openvino/main.py", line 204, in main_export
| framework = TasksManager.determine_framework(
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/optimum/exporters/tasks.py", line 1562, in determine_framework
| raise FileNotFoundError(
| FileNotFoundError: Cannot determine framework from given checkpoint location. There should be a pytorch_model.bin for PyTorch or tf_model.h5 for TensorFlow.
It seems to work fine with "Intel/neural-chat-7b-v3-3" model.
The text was updated successfully, but these errors were encountered: