Skip to content

Commit

Permalink
intial commit for raja launch
Browse files Browse the repository at this point in the history
  • Loading branch information
artv3 committed Dec 27, 2024
1 parent 1d57b1a commit 17f3d15
Show file tree
Hide file tree
Showing 3 changed files with 238 additions and 8 deletions.
3 changes: 3 additions & 0 deletions examples/plugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ if(RAJA_ENABLE_CALIPER)
raja_add_executable(
NAME raja-caliper
SOURCES raja-caliper.cpp caliper-plugin.cpp)
raja_add_executable(
NAME raja-launch-caliper
SOURCES raja-launch-caliper.cpp caliper-plugin.cpp)
endif()

raja_add_executable(
Expand Down
216 changes: 216 additions & 0 deletions examples/plugin/raja-launch-caliper.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
// Copyright (c) 2016-24, Lawrence Livermore National Security, LLC
// and RAJA project contributors. See the RAJA/LICENSE file for details.
//
// SPDX-License-Identifier: (BSD-3-Clause)
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//

#include <cstdlib>
#include <cstring>
#include <iostream>
#include <caliper/cali.h>

#include "RAJA/RAJA.hpp"
#include "RAJA/util/Timer.hpp"

/*
* RAJA Caliper integration with launch
*/

using launch_policy = RAJA::LaunchPolicy<RAJA::seq_launch_t>;
using loop_policy = RAJA::LoopPolicy<RAJA::seq_exec>;

//
// Functions for checking and printing results
//
void checkResult(double* v1, double* v2, int len);
void printResult(double* v, int len);

int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[]))
{
std::cout << "\n\nRAJA daxpy example...\n";

//
auto timer = RAJA::Timer();


//
// Define vector length
//
const int N = 1000000;

//
// Allocate and initialize vector data.
//
double* a0 = new double[N];
double* aref = new double[N];

double* ta = new double[N];
double* tb = new double[N];

double c = 3.14159;

for (int i = 0; i < N; i++) {
a0[i] = 1.0;
tb[i] = 2.0;
}

//
// Declare and set pointers to array data.
// We reset them for each daxpy version so that
// they all look the same.
//

double* a = ta;
double* b = tb;


//----------------------------------------------------------------------------//

std::cout << "\n Running C-version of daxpy...\n";

std::memcpy( a, a0, N * sizeof(double) );
{
timer.start();
CALI_CXX_MARK_SCOPE("CALI: C-version elapsed time");
for (int i = 0; i < N; ++i) {
a[i] += b[i] * c;
}
timer.stop();
RAJA::Timer::ElapsedType etime = timer.elapsed();
std::cout << "C-version elapsed time : " << etime << " seconds" << std::endl;
}

std::memcpy( aref, a, N* sizeof(double) );

//----------------------------------------------------------------------------//


//----------------------------------------------------------------------------//

std::cout << "\n Running launch sequential daxpy...\n";

std::memcpy( a, a0, N * sizeof(double) );
{
timer.reset();
timer.start();
RAJA::launch<launch_policy>
(RAJA::LaunchParams(RAJA::Teams(), RAJA::Threads()),
RAJA::expt::KernelName("CALI: launch kernel"),
[=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) {

RAJA::loop<loop_policy>(ctx, RAJA::RangeSegment(0, N), [&] (int i)
{
a[i] += b[i] * c;
});

});
timer.stop();
RAJA::Timer::ElapsedType etime = timer.elapsed();
std::cout << "C-version elapsed time : " << etime << " seconds" << std::endl;
}
checkResult(a, aref, N);
//printResult(a, N);

//----------------------------------------------------------------------------//

#if defined(RAJA_ENABLE_CUDA)
//
// RAJA CUDA parallel GPU version (256 threads per thread block).
//
std::cout << "\n Running RAJA CUDA daxpy...\n";

a = 0; b = 0;
cudaErrchk(cudaMalloc( (void**)&a, N * sizeof(double) ));
cudaErrchk(cudaMalloc( (void**)&b, N * sizeof(double) ));

cudaErrchk(cudaMemcpy( a, a0, N * sizeof(double), cudaMemcpyHostToDevice ));
cudaErrchk(cudaMemcpy( b, tb, N * sizeof(double), cudaMemcpyHostToDevice ));

//RAJA::forall<RAJA::cuda_exec<256>>(RAJA::RangeSegment(0, N),
//[=] RAJA_DEVICE (int i) {
//a[i] += b[i] * c;
//});

cudaErrchk(cudaMemcpy( ta, a, N * sizeof(double), cudaMemcpyDeviceToHost ));

cudaErrchk(cudaFree(a));
cudaErrchk(cudaFree(b));

a = ta;
checkResult(a, aref, N);
//printResult(a, N);
#endif

//----------------------------------------------------------------------------//

#if defined(RAJA_ENABLE_HIP)
//
// RAJA HIP parallel GPU version (256 threads per thread block).
//
std::cout << "\n Running RAJA HIP daxpy...\n";

a = 0; b = 0;
hipErrchk(hipMalloc( (void**)&a, N * sizeof(double) ));
hipErrchk(hipMalloc( (void**)&b, N * sizeof(double) ));

hipErrchk(hipMemcpy( a, a0, N * sizeof(double), hipMemcpyHostToDevice ));
hipErrchk(hipMemcpy( b, tb, N * sizeof(double), hipMemcpyHostToDevice ));

//RAJA::forall<RAJA::hip_exec<256>>(RAJA::RangeSegment(0, N),
//[=] RAJA_DEVICE (int i) {
//a[i] += b[i] * c;
//});

hipErrchk(hipMemcpy( ta, a, N * sizeof(double), hipMemcpyDeviceToHost ));

hipErrchk(hipFree(a));
hipErrchk(hipFree(b));

a = ta;
checkResult(a, aref, N);
//printResult(a, N);
#endif

//----------------------------------------------------------------------------//

//
// Clean up.
//
delete[] a0;
delete[] aref;
delete[] ta;
delete[] tb;

std::cout << "\n DONE!...\n";

return 0;
}

//
// Function to compare result to reference and report P/F.
//
void checkResult(double* v1, double* v2, int len)
{
bool match = true;
for (int i = 0; i < len; i++) {
if ( v1[i] != v2[i] ) { match = false; }
}
if ( match ) {
std::cout << "\n\t result -- PASS\n";
} else {
std::cout << "\n\t result -- FAIL\n";
}
}

//
// Function to print result.
//
void printResult(double* v, int len)
{
std::cout << std::endl;
for (int i = 0; i < len; i++) {
std::cout << "result[" << i << "] = " << v[i] << std::endl;
}
std::cout << std::endl;
}
27 changes: 19 additions & 8 deletions include/RAJA/pattern/launch/launch_core.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,11 +226,14 @@ void launch(LaunchParams const &launch_params, const char *kernel_name, ReducePa
//Get reducers
auto reducers = expt::make_forall_param_pack(std::forward<ReduceParams>(rest_of_launch_args)...);

//kernel name
std::string kname;

auto&& launch_body = expt::get_lambda(std::forward<ReduceParams>(rest_of_launch_args)...);

//Take the first policy as we assume the second policy is not user defined.
//We rely on the user to pair launch and loop policies correctly.
util::PluginContext context{util::make_context<typename LAUNCH_POLICY::host_policy_t>()};
util::PluginContext context{util::make_context<typename LAUNCH_POLICY::host_policy_t>(&kname)};
util::callPreCapturePlugins(context);

using RAJA::util::trigger_updates_before;
Expand Down Expand Up @@ -260,11 +263,14 @@ void launch(LaunchParams const &launch_params, ReduceParams&&... rest_of_launch_
//Get reducers
auto reducers = expt::make_forall_param_pack(std::forward<ReduceParams>(rest_of_launch_args)...);

//get kernel name
std::string kname;

auto&& launch_body = expt::get_lambda(std::forward<ReduceParams>(rest_of_launch_args)...);

//Take the first policy as we assume the second policy is not user defined.
//We rely on the user to pair launch and loop policies correctly.
util::PluginContext context{util::make_context<typename LAUNCH_POLICY::host_policy_t>()};
util::PluginContext context{util::make_context<typename LAUNCH_POLICY::host_policy_t>(&kname)};
util::callPreCapturePlugins(context);

using RAJA::util::trigger_updates_before;
Expand Down Expand Up @@ -409,15 +415,18 @@ launch(RAJA::resources::Resource res, LaunchParams const &launch_params,
place = RAJA::ExecPlace::DEVICE;
}

//Get kernel name
std::string kname;

//
//Configure plugins
//
#if defined(RAJA_GPU_ACTIVE)
util::PluginContext context{place == ExecPlace::HOST ?
util::make_context<typename POLICY_LIST::host_policy_t>() :
util::make_context<typename POLICY_LIST::device_policy_t>()};
util::make_context<typename POLICY_LIST::host_policy_t>(&kname) :
util::make_context<typename POLICY_LIST::device_policy_t>(&kname)};
#else
util::PluginContext context{util::make_context<typename POLICY_LIST::host_policy_t>()};
util::PluginContext context{util::make_context<typename POLICY_LIST::host_policy_t>(&kname)};
#endif

util::callPreCapturePlugins(context);
Expand Down Expand Up @@ -468,6 +477,8 @@ launch(RAJA::resources::Resource res, LaunchParams const &launch_params,
//Get reducers
auto reducers = expt::make_forall_param_pack(std::forward<ReduceParams>(rest_of_launch_args)...);

std::string kname;

auto&& launch_body = expt::get_lambda(std::forward<ReduceParams>(rest_of_launch_args)...);

ExecPlace place;
Expand All @@ -482,10 +493,10 @@ launch(RAJA::resources::Resource res, LaunchParams const &launch_params,
//
#if defined(RAJA_GPU_ACTIVE)
util::PluginContext context{place == ExecPlace::HOST ?
util::make_context<typename POLICY_LIST::host_policy_t>() :
util::make_context<typename POLICY_LIST::device_policy_t>()};
util::make_context<typename POLICY_LIST::host_policy_t>(&kname) :
util::make_context<typename POLICY_LIST::device_policy_t>(&kname)};
#else
util::PluginContext context{util::make_context<typename POLICY_LIST::host_policy_t>()};
util::PluginContext context{util::make_context<typename POLICY_LIST::host_policy_t>(&kname)};
#endif

util::callPreCapturePlugins(context);
Expand Down

0 comments on commit 17f3d15

Please sign in to comment.