Fix rebase issues

Signed-off-by: southfreebird <[email protected]>
vllm-project · Feb 26, 2025 · dd71c5f · dd71c5f
1 parent 845a47f
commit dd71c5f
Showing 1 changed file with 10 additions and 4 deletions.
diff --git a/vllm/model_executor/guided_decoding/xgrammar_decoding.py b/vllm/model_executor/guided_decoding/xgrammar_decoding.py
@@ -413,10 +413,18 @@ def clone(self) -> XGrammarLogitsProcessor:
         # Share the compiled grammar context (immutable after compilation)
         new_processor.ctx = self.ctx
 
-        # Create fresh matchers for the new sequence
+        # Create fresh matchers for the new sequence and reset
+        # num_processed_tokens for new sequence
         if self.ctx is not None:
+            max_rollback_tokens = (self.config.num_lookahead_slots
+                                   if self.config.num_lookahead_slots else 0)
             new_processor.matchers = [
-                xgr.GrammarMatcher(self.ctx) for _ in range(self.batch_size)
+                xgr.GrammarMatcher(self.ctx,
+                                   max_rollback_tokens=max_rollback_tokens)
+                for _ in range(self.batch_size)
+            ]
+            new_processor.num_processed_tokens = [
+                0 for _ in range(self.batch_size)
             ]
 
         # Create a new token bitmask with the same size
@@ -425,7 +433,5 @@ def clone(self) -> XGrammarLogitsProcessor:
 
         # Copy simple attributes
         new_processor.batch_size = self.batch_size
-        # Reset prefilled state for new sequence
-        new_processor.prefilled = False
 
         return new_processor