Fix UFA非法地址访问(UFA illegal address access) of case2: paddle.scatter (#…

…50025) * add dim check in scatter * add check in scatter.cu * add unittest * remove unnecessary log and comment --------- Co-authored-by: RedContritio <>
PaddlePaddle · Feb 10, 2023 · fb228c4 · fb228c4
1 parent e1a792f
commit fb228c4
Show file tree

Hide file tree

Showing 3 changed files with 66 additions and 18 deletions.
diff --git a/paddle/phi/kernels/funcs/scatter.cu.h b/paddle/phi/kernels/funcs/scatter.cu.h
@@ -28,19 +28,22 @@ namespace funcs {
 template <typename T, typename IndexT = int>
 __global__ void ScatterInitCUDAKernel(const IndexT* indices,
                                       T* output,
+                                      size_t output_count,
                                       size_t index_size,
                                       size_t slice_size) {
   CUDA_KERNEL_LOOP_TYPE(i, index_size * slice_size, int64_t) {
     int64_t indices_i = i / slice_size;
     int64_t slice_i = i - indices_i * slice_size;  // offset inside the slice
     IndexT scatter_i = indices[indices_i];
 
-    PADDLE_ENFORCE(scatter_i >= 0,
-                   "The index is out of bounds, "
-                   "please check whether the dimensions of index and "
-                   "input meet the requirements. It should "
-                   "be greater than or equal to 0, but received [%d]",
-                   scatter_i);
+    PADDLE_ENFORCE(
+        scatter_i >= 0 && scatter_i < output_count,
+        "The index is out of bounds, "
+        "please check whether the dimensions of index and "
+        "input meet the requirements. It should "
+        "be less than [%d] and greater or equal to 0, but received [%d]",
+        output_count,
+        scatter_i);
 
     int64_t out_i = scatter_i * slice_size + slice_i;
     *(output + out_i) = static_cast<T>(0);
@@ -51,6 +54,7 @@ template <typename T, typename IndexT = int>
 __global__ void ScatterCUDAKernel(const T* params,
                                   const IndexT* indices,
                                   T* output,
+                                  size_t output_count,
                                   size_t index_size,
                                   size_t slice_size,
                                   bool overwrite) {
@@ -59,12 +63,14 @@ __global__ void ScatterCUDAKernel(const T* params,
     int64_t slice_i = i - indices_i * slice_size;  // offset inside the slice
     IndexT scatter_i = indices[indices_i];
 
-    PADDLE_ENFORCE(scatter_i >= 0,
-                   "The index is out of bounds, "
-                   "please check whether the dimensions of index and "
-                   "input meet the requirements. It should "
-                   "be greater than or equal to 0, but received [%d]",
-                   scatter_i);
+    PADDLE_ENFORCE(
+        scatter_i >= 0 && scatter_i < output_count,
+        "The index is out of bounds, "
+        "please check whether the dimensions of index and "
+        "input meet the requirements. It should "
+        "be less than [%d] and greater or equal to 0, but received [%d]",
+        output_count,
+        scatter_i);
 
     int64_t out_i = scatter_i * slice_size + slice_i;
     if (overwrite) {
@@ -143,8 +149,7 @@ void GPUScatterAssign(const phi::GPUContext& ctx,
   int64_t index_size = index.dims().size() == 0 ? 1 : index.dims()[0];
 
   auto src_dims = src.dims();
-  phi::DDim output_dims(src_dims);
-  output_dims[0] = index_size;
+  phi::DDim output_dims = output->dims();
 
   // slice size
   size_t slice_size = 1;
@@ -169,11 +174,16 @@ void GPUScatterAssign(const phi::GPUContext& ctx,
   // if not overwrite mode, init data
   if (!overwrite) {
     ScatterInitCUDAKernel<T, IndexT><<<grid, block, 0, ctx.stream()>>>(
-        p_index, p_output, index_size, slice_size);
+        p_index, p_output, output_dims[0], index_size, slice_size);
   }
 
-  ScatterCUDAKernel<T, IndexT><<<grid, block, 0, ctx.stream()>>>(
-      p_src, p_index, p_output, index_size, slice_size, overwrite);
+  ScatterCUDAKernel<T, IndexT><<<grid, block, 0, ctx.stream()>>>(p_src,
+                                                                 p_index,
+                                                                 p_output,
+                                                                 output_dims[0],
+                                                                 index_size,
+                                                                 slice_size,
+                                                                 overwrite);
 }
 
 // The function is only for scatter grad x,
@@ -203,7 +213,7 @@ void GPUScatterGradForX(const phi::GPUContext& ctx,
   phi::backends::gpu::LimitGridDim(ctx, &grid);
 
   ScatterInitCUDAKernel<T, IndexT><<<grid, block, 0, ctx.stream()>>>(
-      p_index, p_output, index_size, slice_size);
+      p_index, p_output, dst_dims[0], index_size, slice_size);
 }
 
 template <typename T, typename IndexT = int>

diff --git a/paddle/phi/kernels/funcs/scatter.h b/paddle/phi/kernels/funcs/scatter.h
@@ -99,6 +99,7 @@ void ScatterAssign(const phi::CPUContext& ctx,
   auto dst_dims = output->dims();
 
   const T* p_src = src.data<T>();
+  // IndexT is int32 or int64, so direct compare is allowed.
   const IndexT* p_index = index.data<IndexT>();
   T* p_output = output->data<T>();
 
@@ -140,6 +141,17 @@ void ScatterAssign(const phi::CPUContext& ctx,
                           "be greater than or equal to 0, but received [%d]",
                           index_));
 
+    PADDLE_ENFORCE_LT(
+        index_,
+        dst_dims[0],
+        phi::errors::OutOfRange(
+            "The index is out of bounds, "
+            "please check whether the values of index and "
+            "dimensions of input meet the requirements. each index should "
+            "be less than 1st-dim size (%d) of input, but received [%d]",
+            dst_dims[0],
+            index_));
+
     memcpy(p_output + index_ * slice_size, p_src + i * slice_size, slice_bytes);
   }
 }

diff --git a/python/paddle/fluid/tests/unittests/test_scatter_op.py b/python/paddle/fluid/tests/unittests/test_scatter_op.py
@@ -376,6 +376,32 @@ def executed_api(self):
         self.scatter = paddle.scatter_
 
 
+@unittest.skipIf(core.is_compiled_with_cuda(), "CUDA will not throw exception")
+class TestScatterError(unittest.TestCase):
+    def test_scatter_index(self):
+        paddle.disable_static()
+        x = paddle.to_tensor([[1, 1], [2, 2], [3, 3]], dtype='float32')
+
+        def test_neg_index():
+            index = paddle.to_tensor([2, 1, -1, 1], dtype='int64')
+            updates = paddle.to_tensor(
+                [[1, 1], [2, 2], [3, 3], [4, 4]], dtype='float32'
+            )
+            out = paddle.scatter(x, index, updates)
+
+        self.assertRaises(IndexError, test_neg_index)
+
+        def test_too_big_index():
+            index = paddle.to_tensor([2, 1, 5, 1], dtype='int64')
+            updates = paddle.to_tensor(
+                [[1, 1], [2, 2], [3, 3], [4, 4]], dtype='float32'
+            )
+            out = paddle.scatter(x, index, updates)
+
+        self.assertRaises(IndexError, test_too_big_index)
+        paddle.enable_static()
+
+
 if __name__ == "__main__":
     paddle.enable_static()
     unittest.main()