From 2415bff2fab4b324fb8550e62f3027e9f7d2e4bc Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Tue, 7 Nov 2023 13:07:39 -0800 Subject: [PATCH] Add comments to explain the test Signed-off-by: Michael Aziz --- .../auto_local_range.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/extension/oneapi_auto_local_range/auto_local_range.cpp b/tests/extension/oneapi_auto_local_range/auto_local_range.cpp index fa14b9e86..4c0ac9492 100644 --- a/tests/extension/oneapi_auto_local_range/auto_local_range.cpp +++ b/tests/extension/oneapi_auto_local_range/auto_local_range.cpp @@ -44,6 +44,7 @@ static void check_auto_range() { sycl::buffer output_buffer{N}; { + // Create an input sequence [1, 2, 3, 4, ..., N]. auto input = input_buffer.get_host_access(); std::iota(input.begin(), input.end(), 1); } @@ -53,14 +54,28 @@ static void check_auto_range() { sycl::accessor output{output_buffer, cgh, sycl::write_only}; sycl::range auto_range = sycl::ext::oneapi::experimental::auto_range(); + // Launch a kernel with a global range of N and a local range chosen by the + // SYCL extension implementation. cgh.parallel_for(sycl::nd_range{N, auto_range}, [=](auto it) { sycl::group g = it.get_group(); int local_accumulator = 0; + + // Each work item computes the sum of a subset of the input values and + // stores the result in local_accumulator. The calls to + // get_local_linear_id() and get_local_linear_range() ensure that the set + // of input values is divided between the work items in a group + // regardless of the group size chosen by the auto_range implementation. for (size_t i = it.get_local_linear_id(); i < N.size(); i += g.get_local_linear_range()) { + // The unlinearize function maps each value of i to a unique value in + // the input. It's needed since multi-dimensional accessors cannot be + // indexed using a scalar. int value = input[unlinearize(N, i)]; local_accumulator += value; } + + // The total sum of the input values is computed using a reduce operation + // with the partial sum from each work item in a group. int total = sycl::reduce_over_group(g, local_accumulator, sycl::plus<>()); output[it.get_global_id()] = total; @@ -68,6 +83,8 @@ static void check_auto_range() { }).wait(); { + // Compare the output values to the expected sum computed using the formula + // for triangular numbers. const int expected_sum = (N.size() * (N.size() + 1)) / 2; auto output = output_buffer.get_host_access(); for (const auto& it : output) {