intel · xytintel · Nov 18, 2024 · Nov 15, 2024 · Nov 18, 2024 · Nov 18, 2024
diff --git a/src/ATen/native/xpu/sycl/LossCTCKernels.cpp b/src/ATen/native/xpu/sycl/LossCTCKernels.cpp
@@ -111,7 +111,6 @@ struct CTCLossLogAlphaKernelFunctor {
         have_three = false;
       }
       for (int64_t t = 1; t < max_input_length_; t++) {
-        item.barrier(sycl_local_fence);
         if ((t < input_length) && (s < 2 * target_length + 1)) {
           // only for valid t, s. This is equation (6) and (7), la1, la2, la3
           // are the three summands, lamax is the maximum for the logsumexp
@@ -161,7 +160,6 @@ struct CTCLossLogAlphaKernelFunctor {
         }
       }
     }
-    item.barrier(sycl_local_fence);
 
     // compute the loss (eq (8))
     if (tid_x == 0) {
@@ -490,7 +488,6 @@ struct CTCLossBackwardLogBetaKernelFunctor {
       // now go backward in t. Note that we need to skip the last timestep that
       // we did above.
       for (int64_t t = max_input_length_ - 2; t >= 0; t--) {
-        item.barrier(sycl_local_fence);
         if ((t < input_length - 1) && (s < 2 * target_length + 1)) {
           scalar_t lb1 = log_beta_data_
               [lb_batch_offset + lb_input_stride_ * (t + 1) +