huggingface · echarlaix · Jan 20, 2025 · Jan 14, 2025 · Jan 16, 2025 · Jan 16, 2025
diff --git a/.github/workflows/test_ipex.yml b/.github/workflows/test_ipex.yml
@@ -18,7 +18,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        transformers-version: ["4.46.0", "4.46.3"]
+        transformers-version: ["4.47.0", "4.47.1"]
         torch-version: ["2.4.0", "2.5.*"]
 
     runs-on: ubuntu-22.04

diff --git a/Dockerfile.ipex b/Dockerfile.ipex
@@ -43,7 +43,7 @@ ARG KMP_HW_SUBSET=1T
 ENV KMP_HW_SUBSET=${KMP_HW_SUBSET}
 ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc.so"
 
-FROM intel/intel-extension-for-pytorch:2.3.110-xpu as xpu
+FROM intel/intel-extension-for-pytorch:2.5.10-xpu as xpu
 WORKDIR /usr/src/
 
 RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \

diff --git a/optimum/exporters/ipex/cache_utils.py b/optimum/exporters/ipex/cache_utils.py
@@ -34,22 +34,23 @@ class IPEXPagedCache(Cache):
     def __init__(
         self,
         config: PretrainedConfig,
-        batch_size: int,
+        max_batch_size: int,
         max_cache_len: int,
         device,
         dtype=None,
         layer_device_map=None,
         **kwargs,
     ) -> None:
         super().__init__()
-        self.batch_size = batch_size
+        self.max_batch_size = max_batch_size
         # Used in `generate` to keep tally of how many tokens the cache has seen
-        self._seen_tokens = torch.zeros([batch_size], dtype=torch.int32, device=device)
+
+        self._seen_tokens = torch.zeros([max_batch_size], dtype=torch.int32, device=device)
         default_block_size = 16 if device.type == "cpu" else 64
         self.block_size = int(os.environ.get("OI_PAGED_ATTN_BLOCK_SIZE", str(default_block_size)))
-        self.num_blocks = (max_cache_len // self.block_size + (max_cache_len % self.block_size != 0)) * batch_size
+        self.num_blocks = (max_cache_len // self.block_size + (max_cache_len % self.block_size != 0)) * max_batch_size
         self.block_tables = -1 * torch.ones([self.num_blocks], dtype=torch.int32, device=device).reshape(
-            batch_size, -1
+            max_batch_size, -1
         )
         self.free_blocks = torch.ones([self.num_blocks], dtype=torch.int32, device=device)
         self.max_cache_len = max_cache_len
@@ -193,7 +194,7 @@ def get_max_length(self) -> Optional[int]:
 
     def reset(self):
         """Resets the cache values while preserving the objects"""
-        self._seen_tokens = torch.zeros([self.batch_size], dtype=torch.int32, device=self.block_tables.device)
+        self._seen_tokens = torch.zeros([self.max_batch_size], dtype=torch.int32, device=self.block_tables.device)
         self.block_tables.fill_(-1)
         self.free_blocks = torch.ones([self.num_blocks], dtype=torch.int32, device=self.block_tables.device)
         self.max_seq_len = 0

diff --git a/optimum/exporters/ipex/model_patcher.py b/optimum/exporters/ipex/model_patcher.py
@@ -41,8 +41,8 @@
 
 
 # Please also update in the setup.py and .github/workflows/test_ipex.yml if you change the transformers version
-_TRANSFORMERS_MIN_VERSION = "4.46.0"
-_TRANSFORMERS_MAX_VERSION = "4.46.99"
+_TRANSFORMERS_MIN_VERSION = "4.47.0"
+_TRANSFORMERS_MAX_VERSION = "4.47.99"
 
 _IPEX_EXPORTED_GENERATION_TASKS = ("text-generation",)
 

diff --git a/setup.py b/setup.py
@@ -66,7 +66,7 @@
     "nncf": ["nncf>=2.14.0"],
     "openvino": ["nncf>=2.14.0", "openvino>=2024.5.0", "openvino-tokenizers>=2024.5.0"],
     "neural-compressor": ["neural-compressor[pt]>3.0", "accelerate", "transformers<4.46"],
-    "ipex": ["intel-extension-for-pytorch>=2.4", "transformers>4.45,<4.47", "accelerate"],
+    "ipex": ["intel-extension-for-pytorch>=2.4", "transformers>4.46,<4.48", "accelerate"],
     "diffusers": ["diffusers"],
     "quality": QUALITY_REQUIRE,
     "tests": TESTS_REQUIRE,