diff --git a/torch_xla/experimental/custom_kernel.py b/torch_xla/experimental/custom_kernel.py
index ed015f733748..5ce4f21a6f23 100644
--- a/torch_xla/experimental/custom_kernel.py
+++ b/torch_xla/experimental/custom_kernel.py
@@ -138,10 +138,12 @@ def trace_pallas(kernel: Callable,
       return trace_pallas_arg_to_payload[hash_key], tensor_args
 
   # Here we ignore the kwargs for execution as most of the time, the kwargs is only used in traced code.
+  os.environ['USE_SINGLE_SLICE'] = 'true'
   ir = jax.jit(
       kernel, static_argnums=static_argnums,
       static_argnames=static_argnames).lower(*jax_args, **kwargs).compiler_ir()
   payload = _extract_backend_config(ir)
+  os.environ.pop('USE_SINGLE_SLICE', None)
 
   if use_cache:
     # if we reach here it means we have a cache miss.