diff --git a/.gitignore b/.gitignore
index 38b4644f..b8eea46d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,8 @@ DartConfiguration.tcl
Testing
build/
+.build/
+.cache/
*.swp
*.png
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4d7106ba..2e2db929 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,6 +7,7 @@ project(redGrapesExamplesAndTests VERSION 0.1.0)
# Examples & Tests
########################################################
option(redGrapes_BUILD_EXAMPLES "Build the examples" ON)
+option(BUILD_TESTING "Build the tests" OFF)
if(redGrapes_BUILD_EXAMPLES)
add_subdirectory("examples/")
diff --git a/README.md b/README.md
index 166556ae..49974b3b 100644
--- a/README.md
+++ b/README.md
@@ -2,13 +2,13 @@
**Re**source-based, **D**eclarative task-**Gra**phs for **P**arallel, **E**vent-driven **S**cheduling
[](https://GitHub.com/ComputationalRadiationPhysics/redGrapes/commit/)
-[](https://isocpp.org/)
+[](https://isocpp.org/)
[](https://www.mozilla.org/en-US/MPL/2.0/)
[](https://redgrapes.readthedocs.io/en/dev/?badge=dev)
-RedGrapes is a C++17 framework for declaratively creating and scheduling task-graphs, based on a high-level resource description.
+RedGrapes is a C++20 framework for declaratively creating and scheduling task-graphs, based on a high-level resource description.
### Motivation
@@ -103,7 +103,7 @@ However since we want to achieve **declarative task dependencies**, for which th
**compile time checked memory access**: The automatic creation of a task graph is often done via annotations, e.g., a pragma in OpenMP, but that does not guarantee the correctness of the access specifications. RedGrapes leverages the type system to write relatively safe code in that regard.
-**native C++**: PaRSEC has a complicated toolchain using additional compilers, OpenMP makes use of pragmas that require compiler support. RedGrapes only requires the C++14 standard.
+**native C++**: PaRSEC has a complicated toolchain using additional compilers, OpenMP makes use of pragmas that require compiler support. RedGrapes only requires the C++20 standard.
**typesafe**: Some libraries like Legion or StarPU use an untyped ``argc``/``argv`` interface to pass parameters to tasks, which is error-prone. Both libraries in general also require a lot of C-style boilerplate.
@@ -154,7 +154,7 @@ Its conceptual design is based on a [whitepaper by A. Huebl, R. Widera, and A. M
### Dependencies
-RedGrapes requires a compiler supporting the C++17 standard.
+RedGrapes requires a compiler supporting the C++20 standard.
RedGrapes further depends on the following libraries:
* [ConcurrentQueue](https://github.com/cameron314/concurrentqueue) by [Cameron Desrochers](https://moodycamel.com)
diff --git a/docs/source/install.rst b/docs/source/install.rst
index eaa225ae..bf5d8407 100644
--- a/docs/source/install.rst
+++ b/docs/source/install.rst
@@ -26,12 +26,12 @@ In order to build the examples and tests, do the typical cmake procedure:
Enable Tests with
::
- cmake .. BUILD_TESTING=ON
+ cmake .. -DBUILD_TESTING=ON
Set Loglevel
::
- cmake .. CMAKE_CXX_FLAGS="-DSPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_OFF"
+ cmake .. -DCMAKE_CXX_FLAGS="-DSPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_OFF"
Enable Tracing with Perfetto
::
- cmake .. redGrapes_ENABLE_PERFETTO=ON
+ cmake .. -DredGrapes_ENABLE_PERFETTO=ON
diff --git a/examples/1_resources.cpp b/examples/1_resources.cpp
index f4980c15..b3303a29 100644
--- a/examples/1_resources.cpp
+++ b/examples/1_resources.cpp
@@ -1,4 +1,4 @@
-/* Copyright 2019 Michael Sippel
+/* Copyright 2019-2024 Michael Sippel, Tapish Narwal
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
@@ -6,31 +6,31 @@
*/
#include
-#include
-#include
-#include
+
+#include
int main(int, char*[])
{
- redGrapes::init(1);
- redGrapes::FieldResource> a;
- redGrapes::IOResource b;
- redGrapes::IOResource c;
+ auto rg = redGrapes::init(1);
+ using TTask = decltype(rg)::RGTask;
+
+ auto a = rg.createFieldResource>();
+ auto b = rg.createIOResource();
+ auto c = rg.createIOResource();
- redGrapes::ResourceUser user1(
+ redGrapes::ResourceUser user1(
{a.read(), // complete resource
a.write().area({0}, {10}), // write only indices 0 to 10
b.write()});
- redGrapes::ResourceUser user2({b.read()});
+ redGrapes::ResourceUser user2({b.read()});
- redGrapes::ResourceUser user3({b.read(), c.write()});
+ redGrapes::ResourceUser user3({b.read(), c.write()});
- std::cout << "is_serial(user1,user1) = " << redGrapes::ResourceUser::is_serial(user1, user1) << std::endl;
- std::cout << "is_serial(user1,user2) = " << redGrapes::ResourceUser::is_serial(user1, user2) << std::endl;
- std::cout << "is_serial(user1,user3) = " << redGrapes::ResourceUser::is_serial(user1, user3) << std::endl;
- std::cout << "is_serial(user2,user3) = " << redGrapes::ResourceUser::is_serial(user2, user3) << std::endl;
+ std::cout << "is_serial(user1,user1) = " << is_serial(user1, user1) << std::endl;
+ std::cout << "is_serial(user1,user2) = " << is_serial(user1, user2) << std::endl;
+ std::cout << "is_serial(user1,user3) = " << is_serial(user1, user3) << std::endl;
+ std::cout << "is_serial(user2,user3) = " << is_serial(user2, user3) << std::endl;
- redGrapes::finalize();
return 0;
}
diff --git a/examples/2_functors.cpp b/examples/2_functors.cpp
index b5e9b0e4..2d2c1237 100644
--- a/examples/2_functors.cpp
+++ b/examples/2_functors.cpp
@@ -1,4 +1,4 @@
-/* Copyright 2019 Michael Sippel
+/* Copyright 2019-2024 Michael Sippel, Tapish Narwal
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
@@ -6,10 +6,6 @@
*/
#include
-#include
-#include
-
-#include
int square(int x)
{
@@ -19,10 +15,9 @@ int square(int x)
int main()
{
spdlog::set_level(spdlog::level::trace);
- redGrapes::init(1);
+ auto rg = redGrapes::init(1);
- fmt::print("square(2) = {}\n", redGrapes::emplace_task(square, 2).get());
+ fmt::print("square(2) = {}\n", rg.emplace_task(square, 2).get());
- redGrapes::finalize();
return 0;
}
diff --git a/examples/3_functors_with_resources.cpp b/examples/3_functors_with_resources.cpp
index 20364fa4..8fc35cb1 100644
--- a/examples/3_functors_with_resources.cpp
+++ b/examples/3_functors_with_resources.cpp
@@ -1,4 +1,4 @@
-/* Copyright 2019 Michael Sippel
+/* Copyright 2019-2024 Michael Sippel, Tapish Narwal
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
@@ -15,13 +15,14 @@
int main(void)
{
spdlog::set_level(spdlog::level::trace);
- redGrapes::init();
+ auto rg = redGrapes::init();
- redGrapes::IOResource a, b;
+ auto a = rg.createIOResource();
+ auto b = rg.createIOResource();
for(int i = 0; i < 1; ++i)
{
- redGrapes::emplace_task(
+ rg.emplace_task(
[](auto a)
{
std::cout << "Write to A" << std::endl;
@@ -31,7 +32,7 @@ int main(void)
},
a.write());
- redGrapes::emplace_task(
+ rg.emplace_task(
[](auto a)
{
std::cout << "Read A: " << *a << std::endl;
@@ -39,17 +40,17 @@ int main(void)
},
a.read());
- redGrapes::emplace_task(
+ rg.emplace_task(
[](auto b)
{
std::cout << "Write to B" << std::endl;
- std::this_thread::sleep_for(std::chrono::seconds(2));
+ std::this_thread::sleep_for(std::chrono::seconds(3));
*b = 7;
std::cout << "Write B done" << std::endl;
},
b.write());
- redGrapes::emplace_task(
+ rg.emplace_task(
[](auto a, auto b)
{
std::cout << "Read A & B: " << *a << ", " << *b << std::endl;
@@ -59,7 +60,5 @@ int main(void)
b.read());
}
- redGrapes::finalize();
-
return 0;
}
diff --git a/examples/4_refinements.cpp b/examples/4_refinements.cpp
index e30bf56e..213ecc48 100644
--- a/examples/4_refinements.cpp
+++ b/examples/4_refinements.cpp
@@ -1,4 +1,4 @@
-/* Copyright 2019 Michael Sippel
+/* Copyright 2019-2024 Michael Sippel, Tapish Narwal
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
@@ -6,6 +6,7 @@
*/
#include
+#include
#include
#include
@@ -16,43 +17,45 @@ int main(int, char*[])
spdlog::set_level(spdlog::level::trace);
spdlog::set_pattern("[thread %t] %^[%l]%$ %v");
- redGrapes::init(4);
-
- redGrapes::emplace_task(
- []
- {
- std::cout << "f1"
- << "..." << std::endl;
-
- int i = 0;
- for(auto t : redGrapes::backtrace())
- fmt::print("refinement 1 backtrace [{}]: {}\n", i++, t.get().label);
-
- redGrapes::emplace_task(
- []
- {
- fmt::print("Refinement 1\n");
- std::this_thread::sleep_for(std::chrono::seconds(1));
- });
-
- SPDLOG_TRACE("EX: create next task task");
-
- redGrapes::emplace_task(
- []
- {
- fmt::print("Refinement 2\n");
- std::this_thread::sleep_for(std::chrono::seconds(1));
-
- int i = 0;
- for(auto t : redGrapes::backtrace())
- fmt::print("refinement 2 backtrace [{}]: {}\n", i++, (redGrapes::TaskProperties const&) t);
- })
- .label("Child Task 2");
- })
+ auto rg = redGrapes::init(4);
+
+ rg.emplace_task(
+ [&rg]
+ {
+ std::cout << "f1"
+ << "..." << std::endl;
+
+ int i = 0;
+ for(auto t : rg.backtrace())
+ fmt::print("refinement 1 backtrace [{}]: {}\n", i++, t.get().label);
+
+ rg.emplace_task(
+ []
+ {
+ fmt::print("Refinement 1\n");
+ std::this_thread::sleep_for(std::chrono::seconds(1));
+ });
+
+ SPDLOG_TRACE("EX: create next task task");
+
+ rg.emplace_task(
+ [&rg]
+ {
+ fmt::print("Refinement 2\n");
+ std::this_thread::sleep_for(std::chrono::seconds(1));
+
+ int i = 0;
+ for(auto t : rg.backtrace())
+ fmt::print(
+ "refinement 2 backtrace [{}]: {}\n",
+ i++,
+ (decltype(rg)::RGTask::TaskProperties const&) t); // TODO cleaner way to do this
+ })
+ .label("Child Task 2");
+ })
.label("Parent Task")
.submit();
- redGrapes::finalize();
return 0;
}
diff --git a/examples/5_access_demotion.cpp b/examples/5_access_demotion.cpp
index 03e3e7c3..7bd5500a 100644
--- a/examples/5_access_demotion.cpp
+++ b/examples/5_access_demotion.cpp
@@ -1,4 +1,4 @@
-/* Copyright 2019 Michael Sippel
+/* Copyright 2019-2024 Michael Sippel, Tapish Narwal
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
@@ -7,7 +7,6 @@
#include
#include
-#include
#include
#include
@@ -18,33 +17,32 @@ namespace rg = redGrapes;
int main(int, char*[])
{
spdlog::set_level(spdlog::level::trace);
- rg::init();
- rg::IOResource a;
+ auto rg = rg::init();
+ auto a = rg.createIOResource();
- rg::emplace_task(
- [](auto a)
+ rg.emplace_task(
+ [&](auto a)
{
std::cout << "f1 writes A" << std::endl;
std::this_thread::sleep_for(std::chrono::seconds(1));
std::cout << "f1 now only reads A" << std::endl;
- rg::update_properties(
- rg::TaskProperties::Patch::Builder().remove_resources({a.write()}).add_resources({a.read()}));
+ rg.update_properties(decltype(rg)::RGTask::TaskProperties::Patch::Builder()
+ .remove_resources({a.write()})
+ .add_resources({a.read()}));
std::this_thread::sleep_for(std::chrono::seconds(1));
std::cout << "f1 done" << std::endl;
},
a.write());
- rg::emplace_task(
- [](auto a)
+ rg.emplace_task(
+ []([[maybe_unused]] auto a)
{
std::cout << "f2 reads A" << std::endl;
std::cout << "f2 done" << std::endl;
},
a.read());
- rg::finalize();
-
return 0;
}
diff --git a/examples/6_resource_scope.cpp b/examples/6_resource_scope.cpp
index 5d267117..283c9737 100644
--- a/examples/6_resource_scope.cpp
+++ b/examples/6_resource_scope.cpp
@@ -1,4 +1,4 @@
-/* Copyright 2019 Michael Sippel
+/* Copyright 2019-2024 Michael Sippel, Tapish Narwal
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
@@ -7,35 +7,33 @@
#include
#include
-#include
-#include
+
+#include
namespace rg = redGrapes;
int main()
{
- rg::init(1);
- rg::IOResource a; // scope-level=0
+ auto rg = rg::init(1);
+ auto a = rg.createIOResource(); // scope-level=0
- rg::emplace_task(
- [](auto a)
- {
- std::cout << "scope = " << rg::scope_depth() << std::endl;
- rg::IOResource b; // scope-level=1
+ rg.emplace_task(
+ [&]([[maybe_unused]] auto a)
+ {
+ std::cout << "scope = " << rg.scope_depth() << std::endl;
+ auto b = rg.createIOResource(); // scope-level=1
- rg::emplace_task(
- [](auto b)
- {
- *b = 1;
- std::cout << "scope = " << rg::scope_depth() << std::endl;
- },
- b.write())
- .get();
+ rg.emplace_task(
+ [&](auto b)
+ {
+ *b = 1;
+ std::cout << "scope = " << rg.scope_depth() << std::endl;
+ },
+ b.write())
+ .get();
- std::cout << "scope = " << rg::scope_depth() << std::endl;
- },
- a.read())
+ std::cout << "scope = " << rg.scope_depth() << std::endl;
+ },
+ a.read())
.enable_stack_switching();
-
- rg::finalize();
}
diff --git a/examples/7_event.cpp b/examples/7_event.cpp
index 96f12203..3012a146 100644
--- a/examples/7_event.cpp
+++ b/examples/7_event.cpp
@@ -1,4 +1,4 @@
-/* Copyright 2019 Michael Sippel
+/* Copyright 2019-2024 Michael Sippel, Tapish Narwal
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
@@ -8,9 +8,6 @@
#define SPDLOG_ACTIVE_LEVEL SPDLOG_LEVEL_OFF
#include
-#include
-#include
-#include
#include
#include
@@ -21,20 +18,20 @@ int main()
spdlog::set_level(spdlog::level::trace);
spdlog::set_pattern("[thread %t] %^[%l]%$ %v");
- redGrapes::init(1);
+ auto rg = redGrapes::init(1);
- redGrapes::Resource r1;
+ auto r1 = rg.createResource();
- auto event_f = redGrapes::emplace_task(
- []
- {
- std::cout << "Task 1" << std::endl;
- return redGrapes::create_event();
- })
+ auto event_f = rg.emplace_task(
+ [&]
+ {
+ std::cout << "Task 1" << std::endl;
+ return rg.create_event();
+ })
.resources({r1.make_access(redGrapes::access::IOAccess::write)})
.submit();
- redGrapes::emplace_task([] { std::cout << "Task 2" << std::endl; })
+ rg.emplace_task([] { std::cout << "Task 2" << std::endl; })
.resources({r1.make_access(redGrapes::access::IOAccess::write)});
auto event = event_f.get();
@@ -45,7 +42,6 @@ int main()
std::cout << "notify event" << std::endl;
event->notify();
- redGrapes::finalize();
return 0;
}
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index bdae5941..b2ddd839 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -3,8 +3,6 @@ cmake_minimum_required(VERSION 3.18.0)
project(redGrapesExamples LANGUAGES CXX)
-set(redGrapes_CONFIG_INCLUDE_DIR "${CMAKE_CURRENT_LIST_DIR}/config")
-
find_package(redGrapes REQUIRED CONFIG PATHS "${CMAKE_CURRENT_LIST_DIR}/../")
include_directories(SYSTEM ${redGrapes_INCLUDE_DIRS})
@@ -57,7 +55,7 @@ if(CMAKE_CUDA_COMPILER)
enable_language(CUDA)
add_executable(cuda_mandelbrot cuda_mandelbrot.cu)
- target_compile_features(cuda_mandelbrot PUBLIC cuda_std_14)
+ target_compile_features(cuda_mandelbrot PUBLIC cxx_std_${redGrapes_CXX_STANDARD})
set_target_properties(cuda_mandelbrot PROPERTIES
CUDA_EXTENSIONS OFF
CUDA_STANDARD_REQUIRED ON
diff --git a/examples/cholesky.cpp b/examples/cholesky.cpp
index 65853130..0b20402f 100644
--- a/examples/cholesky.cpp
+++ b/examples/cholesky.cpp
@@ -1,27 +1,37 @@
-#include
-
-#include
-// work-around, see
-// https://github.com/xianyi/OpenBLAS/issues/1992#issuecomment-459474791
-// https://github.com/xianyi/OpenBLAS/pull/1998
-#include
-#define lapack_complex_float std::complex
-#define lapack_complex_double std::complex
-// end work-around
-
-#include
-#include
-
-#include
-
-#define REDGRAPES_TASK_PROPERTIES redGrapes::LabelProperty
+/* Copyright 2019-2024 Michael Sippel, Tapish Narwal
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
#include
#include
-namespace rg = redGrapes;
+#include
+#include
+
+#include
-void print_matrix(std::vector> A, int n_blocks, int blocksize);
+template
+void print_matrix(std::vector> A, int nblks, int blocksize)
+{
+ for(int ia = 0; ia < nblks; ++ia)
+ {
+ for(int ib = 0; ib < blocksize; ++ib)
+ {
+ for(int ja = 0; ja < nblks; ++ja)
+ {
+ for(int jb = 0; jb < blocksize; ++jb)
+ {
+ std::cout << (*A[ja * nblks + ia])[jb * blocksize + ib] << "; ";
+ }
+ }
+ std::cout << std::endl;
+ }
+ }
+ std::cout << std::endl;
+}
int main(int argc, char* argv[])
{
@@ -44,7 +54,8 @@ int main(int argc, char* argv[])
if(argc >= 4)
n_threads = atoi(argv[3]);
- rg::init(n_threads);
+ auto rg = redGrapes::init(n_threads);
+ using RGTask = decltype(rg)::RGTask;
size_t N = nblks * blksz;
@@ -59,9 +70,8 @@ int main(int argc, char* argv[])
for(size_t i = 0; i < N; i++)
Alin[i * N + i] += N;
-
// initialize tiled matrix in column-major layout
- std::vector> A(nblks * nblks);
+ std::vector> A(nblks * nblks);
// allocate each tile (also in column-major layout)
for(size_t j = 0; j < nblks; ++j)
@@ -88,10 +98,10 @@ int main(int argc, char* argv[])
for(size_t i = j + 1; i < nblks; i++)
{
// A[i,j] = A[i,j] - A[i,k] * (A[j,k])^t
- rg::emplace_task(
+ rg.emplace_task(
[blksz](auto a, auto b, auto c)
{
- spdlog::info("dgemm");
+ spdlog::debug("dgemm");
cblas_dgemm(
CblasColMajor,
CblasNoTrans,
@@ -117,10 +127,10 @@ int main(int argc, char* argv[])
for(size_t i = 0; i < j; i++)
{
// A[j,j] = A[j,j] - A[j,i] * (A[j,i])^t
- rg::emplace_task(
- [blksz, nblks](auto a, auto c)
+ rg.emplace_task(
+ [blksz](auto a, auto c)
{
- spdlog::info("dsyrk");
+ spdlog::debug("dsyrk");
cblas_dsyrk(
CblasColMajor,
CblasLower,
@@ -139,10 +149,10 @@ int main(int argc, char* argv[])
}
// Cholesky Factorization of A[j,j]
- rg::emplace_task(
- [j, blksz, nblks](auto a)
+ rg.emplace_task(
+ [blksz](auto a)
{
- spdlog::info("dpotrf");
+ spdlog::debug("dpotrf");
LAPACKE_dpotrf(LAPACK_COL_MAJOR, 'L', blksz, *a, blksz);
},
A[j * nblks + j].write());
@@ -150,10 +160,10 @@ int main(int argc, char* argv[])
for(size_t i = j + 1; i < nblks; i++)
{
// A[i,j] <- A[i,j] = X * (A[j,j])^t
- rg::emplace_task(
- [blksz, nblks](auto a, auto b)
+ rg.emplace_task(
+ [blksz](auto a, auto b)
{
- spdlog::info("dtrsm");
+ spdlog::debug("dtrsm");
cblas_dtrsm(
CblasColMajor,
CblasRight,
@@ -173,28 +183,7 @@ int main(int argc, char* argv[])
}
}
- rg::finalize();
-
print_matrix(A, nblks, blksz);
return 0;
}
-
-void print_matrix(std::vector> A, int nblks, int blocksize)
-{
- for(int ia = 0; ia < nblks; ++ia)
- {
- for(int ib = 0; ib < blocksize; ++ib)
- {
- for(int ja = 0; ja < nblks; ++ja)
- {
- for(int jb = 0; jb < blocksize; ++jb)
- {
- std::cout << (*A[ja * nblks + ia])[jb * blocksize + ib] << "; ";
- }
- }
- std::cout << std::endl;
- }
- }
- std::cout << std::endl;
-}
diff --git a/examples/config/redGrapes_config.hpp b/examples/config/redGrapes_config.hpp
deleted file mode 100644
index 3aff6572..00000000
--- a/examples/config/redGrapes_config.hpp
+++ /dev/null
@@ -1,39 +0,0 @@
-#pragma once
-
-
-#include
-#include
-
-enum SchedulerTags
-{
- SCHED_MPI,
- SCHED_CUDA
-};
-
-#define REDGRAPES_TASK_PROPERTIES \
- redGrapes::LabelProperty, redGrapes::scheduler::SchedulingTagProperties
-
-#define REDGRAPES_ALLOC_CHUNKSIZE (64 * 1024)
-
-template<>
-struct fmt::formatter
-{
- constexpr auto parse(format_parse_context& ctx)
- {
- return ctx.begin();
- }
-
- template
- auto format(SchedulerTags const& tag, FormatContext& ctx)
- {
- switch(tag)
- {
- case SCHED_MPI:
- return fmt::format_to(ctx.out(), "\"MPI\"");
- case SCHED_CUDA:
- return fmt::format_to(ctx.out(), "\"CUDA\"");
- default:
- return fmt::format_to(ctx.out(), "\"undefined\"");
- }
- }
-};
diff --git a/examples/cuda_mandelbrot.cu b/examples/cuda_mandelbrot.cu
index 627b4c9e..8ae9cb68 100644
--- a/examples/cuda_mandelbrot.cu
+++ b/examples/cuda_mandelbrot.cu
@@ -1,11 +1,22 @@
-/* Copyright 2020 Michael Sippel
+/* Copyright 2020-2024 Michael Sippel, Tapish Narwal
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
#include
+#include
#include
#include
@@ -13,23 +24,6 @@
#include
#include
-enum SchedulerTag
-{
- SCHED_CUDA
-};
-
-#define REDGRAPES_TASK_PROPERTIES dispatch::cuda::CudaTaskProperties, scheduler::SchedulingTagProperties
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-namespace rg = redGrapes;
-
struct Color
{
float r, g, b;
@@ -70,18 +64,27 @@ __global__ void mandelbrot(
out[index] = Color{cosf(float(i) / 7.0), cosf(2.0 + float(i) / 11.0), cosf(4.0 + float(i) / 13.0)};
}
-int main()
+struct CudaTag
{
- auto default_scheduler = std::make_shared(4 /* number of CPU workers */);
+};
+
+using RGTask = redGrapes::Task;
- auto cuda_scheduler = std::make_shared(
- [](rg::Task const& t) { return t.required_scheduler_tags.test(SCHED_CUDA); },
- 4 /* number of cuda streams */
- );
+int main()
+{
+ spdlog::set_level(spdlog::level::trace);
+ spdlog::set_pattern("[thread %t] %^[%l]%$ %v");
- rg::idle = [cuda_scheduler] { cuda_scheduler->poll(); };
+ auto rg = redGrapes::init(
+ redGrapes::SchedulerDescription(
+ std::make_shared>(),
+ CudaTag{}),
+ redGrapes::SchedulerDescription(
+ std::make_shared>>(
+ 4),
+ redGrapes::DefaultTag{}));
- rg::init(rg::scheduler::make_tag_match_scheduler().add({}, default_scheduler).add({SCHED_CUDA}, cuda_scheduler));
+ auto& cudaSched = rg.getScheduler();
double mid_x = 0.41820187155955555;
double mid_y = 0.32743154895555555;
@@ -90,10 +93,10 @@ int main()
size_t height = 4096;
size_t area = width * height;
- rg::IOResource host_buffer;
- rg::IOResource device_buffer;
+ redGrapes::IOResource host_buffer;
+ redGrapes::IOResource device_buffer;
- rg::emplace_task(
+ rg.emplace_task(
[area](auto host_buffer)
{
void* ptr;
@@ -102,7 +105,7 @@ int main()
},
host_buffer.write());
- rg::emplace_task(
+ rg.emplace_task(
[area](auto device_buffer)
{
void* ptr;
@@ -124,52 +127,55 @@ int main()
/*
* calculate picture
*/
- rg::emplace_task(
- [width, height, area, i, mid_x, mid_y, w](auto device_buffer)
- {
- double begin_x = mid_x - w;
- double end_x = mid_x + w;
- double begin_y = mid_y - w;
- double end_y = mid_y + w;
-
- dim3 threadsPerBlock(8, 8);
- dim3 numBlocks(width / threadsPerBlock.x, height / threadsPerBlock.y);
-
- mandelbrot<<>>(
- begin_x,
- end_x,
- begin_y,
- end_y,
- width,
- height,
- *device_buffer);
- std::cout << "launched kernel to stream " << rg::dispatch::cuda::current_stream << std::endl;
- },
- rg::TaskProperties::Builder().scheduling_tags({SCHED_CUDA}),
- device_buffer.write());
+ rg.emplace_task(
+ [width, height, area, i, mid_x, mid_y, w, &cudaSched](auto device_buffer)
+ {
+ double begin_x = mid_x - w;
+ double end_x = mid_x + w;
+ double begin_y = mid_y - w;
+ double end_y = mid_y + w;
+
+ dim3 threadsPerBlock(8, 8);
+ dim3 numBlocks(width / threadsPerBlock.x, height / threadsPerBlock.y);
+
+ auto current_stream = cudaSched.getCudaStream(0);
+ mandelbrot<<>>(
+ begin_x,
+ end_x,
+ begin_y,
+ end_y,
+ width,
+ height,
+ *device_buffer);
+ std::cout << "launched kernel to stream " << current_stream << std::endl;
+ },
+ device_buffer.write())
+ .cuda_stream_index(0u);
/*
* copy data
*/
- rg::emplace_task(
- [area](auto host_buffer, auto device_buffer)
- {
- cudaMemcpyAsync(
- *host_buffer,
- *device_buffer,
- area * sizeof(Color),
- cudaMemcpyDeviceToHost,
- rg::dispatch::cuda::current_stream);
- std::cout << "launched memcpy to stream " << rg::dispatch::cuda::current_stream << std::endl;
- },
- rg::TaskProperties::Builder().scheduling_tags({SCHED_CUDA}),
- host_buffer.write(),
- device_buffer.read());
+ rg.emplace_task(
+ [area, &cudaSched](auto host_buffer, auto device_buffer)
+ {
+ auto current_stream = cudaSched.getCudaStream(0);
+ cudaMemcpyAsync(
+ *host_buffer,
+ *device_buffer,
+ area * sizeof(Color),
+ cudaMemcpyDeviceToHost,
+ current_stream);
+ std::cout << "launched memcpy to stream " << current_stream << std::endl;
+ },
+ host_buffer.write(),
+ device_buffer.read())
+ .cuda_stream_index(0u);
+ ;
/*
* write png
*/
- rg::emplace_task(
+ rg.emplace_task(
[width, height, i](auto host_buffer)
{
std::stringstream step;
@@ -179,9 +185,9 @@ int main()
pngwriter png(width, height, 0, filename.c_str());
png.setcompressionlevel(9);
- for(int y = 0; y < height; ++y)
+ for(size_t y = 0; y < height; ++y)
{
- for(int x = 0; x < width; ++x)
+ for(size_t x = 0; x < width; ++x)
{
auto& color = (*host_buffer)[x + y * width];
png.plot(x + 1, height - y, color.r, color.g, color.b);
@@ -194,7 +200,7 @@ int main()
host_buffer.read());
}
- rg::emplace_task([](auto b) {}, host_buffer.write()).get();
+ rg.emplace_task([]([[maybe_unused]] auto b) {}, host_buffer.write()).get();
auto t2 = std::chrono::high_resolution_clock::now();
std::cout << "runtime: " << std::chrono::duration_cast(t2 - t1).count() << " μs"
@@ -203,7 +209,9 @@ int main()
/*
* cleanup
*/
- rg::emplace_task([](auto host_buffer) { cudaFreeHost(*host_buffer); }, host_buffer.write());
+ rg.emplace_task([](auto host_buffer) { cudaFreeHost(*host_buffer); }, host_buffer.write())
+ .cuda_stream_index(0u);
- rg::emplace_task([](auto device_buffer) { cudaFree(*device_buffer); }, device_buffer.write());
+ rg.emplace_task([](auto device_buffer) { cudaFree(*device_buffer); }, device_buffer.write())
+ .cuda_stream_index(0u);
}
diff --git a/examples/game_of_life.cpp b/examples/game_of_life.cpp
index 592bb0b0..5d42da76 100644
--- a/examples/game_of_life.cpp
+++ b/examples/game_of_life.cpp
@@ -1,4 +1,4 @@
-/* Copyright 2019 Michael Sippel, Sergei Bastrakov
+/* Copyright 2019-2024 Michael Sippel, Sergei Bastrakov, Tapish Narwal
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
@@ -9,17 +9,14 @@
* @file examples/game_of_life.cpp
*/
+#include "redGrapes/resource/fieldresource.hpp"
+
#include
-#include
-#include
-#include
-#include
#include
#include
#include
#include
-#include
struct Vec2
{
@@ -52,11 +49,12 @@ int main(int, char*[])
spdlog::set_level(spdlog::level::trace);
spdlog::set_pattern("[thread %t] %^[%l]%$ %v");
- redGrapes::init(4);
+ auto rg = redGrapes::init(4);
using Buffer = std::array, size.y + 2>;
- std::vector> buffers;
+ using TaskType = decltype(rg)::RGTask;
+ std::vector> buffers;
for(size_t i = 0; i < 4; ++i)
buffers.emplace_back(new Buffer());
@@ -64,7 +62,7 @@ int main(int, char*[])
int current = 0;
// initialization
- redGrapes::emplace_task(
+ rg.emplace_task(
[](auto buf)
{
std::default_random_engine generator;
@@ -81,7 +79,7 @@ int main(int, char*[])
int next = (current + 1) % buffers.size();
// copy borders
- redGrapes::emplace_task(
+ rg.emplace_task(
[](auto buf)
{
for(size_t x = 0; x < size.x + 2; ++x)
@@ -99,26 +97,26 @@ int main(int, char*[])
buffers[current].write());
// print buffer
- redGrapes::emplace_task(
- [](auto buf)
- {
- for(size_t x = 1; x < size.x; ++x)
- {
- for(size_t y = 1; y < size.y; ++y)
- {
- std::cout << ((buf[{x, y}] == ALIVE) ? "[47m" : "[100m") << " ";
- }
- std::cout << "[0m" << std::endl;
- }
- std::cout << std::endl;
- },
- buffers[current].read())
+ rg.emplace_task(
+ [](auto buf)
+ {
+ for(size_t x = 1; x < size.x; ++x)
+ {
+ for(size_t y = 1; y < size.y; ++y)
+ {
+ std::cout << ((buf[{x, y}] == ALIVE) ? "[47m" : "[100m") << " ";
+ }
+ std::cout << "[0m" << std::endl;
+ }
+ std::cout << std::endl;
+ },
+ buffers[current].read())
.get();
// calculate next step
for(size_t x = 1; x <= size.x; x += chunk_size.x)
for(size_t y = 1; y <= size.y; y += chunk_size.y)
- redGrapes::emplace_task(
+ rg.emplace_task(
[x, y](auto dst, auto src)
{
for(int xi = 0; xi < chunk_size.x; ++xi)
@@ -132,8 +130,6 @@ int main(int, char*[])
current = next;
}
- redGrapes::finalize();
-
SPDLOG_DEBUG("END!!!!");
return 0;
diff --git a/examples/mpi.cpp b/examples/mpi.cpp
index 94867e2c..8712c924 100644
--- a/examples/mpi.cpp
+++ b/examples/mpi.cpp
@@ -1,12 +1,21 @@
-#include
-#include
+/* Copyright 2019-2024 Michael Sippel, Tapish Narwal
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+#define ENABLE_WORKSTEALING 1
+
+#include
+#include
#include
#include
#include
-#include
-#include
+#include
+#include
+#include
-namespace rg = redGrapes;
+#include
/**
* This example shows how to use MPI with redGrapes.
@@ -28,78 +37,80 @@ namespace rg = redGrapes;
* For the iteration, double buffering is used.
*/
+enum SchedulerTags
+{
+ SCHED_MPI,
+ SCHED_CUDA
+};
+
struct MPIConfig
{
int world_rank;
int world_size;
};
+struct MPITag
+{
+};
+
+struct UselessWorkers
+{
+};
+
int main()
{
spdlog::set_pattern("[thread %t] %^[%l]%$ %v");
spdlog::set_level(spdlog::level::trace);
+ using RGTask = redGrapes::Task<>;
- /*
- int prov;
- MPI_Init_thread( nullptr, nullptr, MPI_THREAD_MULTIPLE, &prov );
- assert( prov == MPI_THREAD_MULTIPLE );
- */
-
- MPI_Init(nullptr, nullptr);
-
- auto default_scheduler = std::make_shared();
- auto mpi_request_pool = std::make_shared();
-
- hwloc_obj_t obj = hwloc_get_obj_by_type(redGrapes::SingletonContext::get().hwloc_ctx.topology, HWLOC_OBJ_PU, 1);
- rg::memory::ChunkedBumpAlloc mpi_alloc(
- rg::memory::HwlocAlloc(redGrapes::SingletonContext::get().hwloc_ctx, obj));
- auto mpi_worker = std::make_shared(
- mpi_alloc,
- redGrapes::SingletonContext::get().hwloc_ctx,
- obj,
- 4);
-
- // initialize main thread to execute tasks from the mpi-queue and poll
- rg::SingletonContext::get().idle = [mpi_worker, mpi_request_pool]
- {
- mpi_request_pool->poll();
+ auto rg = redGrapes::init(
+ redGrapes::SchedulerDescription(
+ std::make_shared>>(
+ 17),
+ UselessWorkers{}),
+ redGrapes::SchedulerDescription(
+ std::make_shared>>(
+ 4),
+ redGrapes::DefaultTag{}),
+ redGrapes::SchedulerDescription(
+ std::make_shared>(),
+ MPITag{}));
- redGrapes::Task* task;
- if(task = mpi_worker->ready_queue.pop())
- redGrapes::SingletonContext::get().execute_task(*task);
+ auto& mpiSched = rg.getScheduler();
- while(mpi_worker->init_dependencies(task, true))
- if(task)
- {
- redGrapes::SingletonContext::get().execute_task(*task);
- break;
- }
- };
+ auto mpi_request_pool = mpiSched.getRequestPool();
- rg::init(4, rg::scheduler::make_tag_match_scheduler().add({}, default_scheduler).add({SCHED_MPI}, mpi_worker));
+ int prov;
+
+ // initialize MPI
+ rg.emplace_task(
+ [&prov]()
+ {
+ MPI_Init_thread(nullptr, nullptr, MPI_THREAD_FUNNELED, &prov);
+ assert(prov == MPI_THREAD_FUNNELED);
+ });
// initialize MPI config
- rg::IOResource mpi_config;
- rg::emplace_task(
+ redGrapes::IOResource mpi_config;
+ rg.emplace_task(
[](auto config)
{
MPI_Comm_rank(MPI_COMM_WORLD, &config->world_rank);
MPI_Comm_size(MPI_COMM_WORLD, &config->world_size);
},
- mpi_config.write())
- .scheduling_tags(std::bitset<64>().set(SCHED_MPI));
+ mpi_config.write());
// main loop
- rg::FieldResource> field[2] = {
- rg::FieldResource>(new std::array()),
- rg::FieldResource>(new std::array()),
+ redGrapes::FieldResource, RGTask> field[2] = {
+ redGrapes::FieldResource, RGTask>(new std::array()),
+ redGrapes::FieldResource, RGTask>(new std::array()),
};
int current = 0;
// initialize
- rg::emplace_task(
+ rg.emplace_task(
[](auto buf, auto mpi_config)
{
int offset = 3 * mpi_config->world_rank;
@@ -109,7 +120,7 @@ int main()
field[current].write(),
mpi_config.read());
- for(size_t i = 0; i < 1; ++i)
+ for(size_t j = 0; j < 4; ++j)
{
int next = (current + 1) % 2;
@@ -118,45 +129,43 @@ int main()
*/
// Send
- rg::emplace_task(
- [i, current, mpi_request_pool](auto field, auto mpi_config)
- {
- int dst = (mpi_config->world_rank + 1) % mpi_config->world_size;
-
- MPI_Request request;
- MPI_Isend(&field[{3}], sizeof(int), MPI_CHAR, dst, current, MPI_COMM_WORLD, &request);
-
- mpi_request_pool->get_status(request);
- },
- field[current].at({3}).read(),
- mpi_config.read())
- .scheduling_tags({SCHED_MPI})
+ rg.emplace_task(
+ [current, mpi_request_pool](auto field, auto mpi_config)
+ {
+ int dst = (mpi_config->world_rank + 1) % mpi_config->world_size;
+
+ MPI_Request request;
+ MPI_Isend(&field[{3}], sizeof(int), MPI_CHAR, dst, current, MPI_COMM_WORLD, &request);
+
+ mpi_request_pool->get_status(request);
+ },
+ field[current].at({3}).read(),
+ mpi_config.read())
.enable_stack_switching();
// Receive
- rg::emplace_task(
- [i, current, mpi_request_pool](auto field, auto mpi_config)
- {
- int src = (mpi_config->world_rank - 1) % mpi_config->world_size;
+ rg.emplace_task(
+ [current, mpi_request_pool](auto field, auto mpi_config)
+ {
+ int src = (mpi_config->world_rank - 1) % mpi_config->world_size;
- MPI_Request request;
- MPI_Irecv(&field[{0}], sizeof(int), MPI_CHAR, src, current, MPI_COMM_WORLD, &request);
+ MPI_Request request;
+ MPI_Irecv(&field[{0}], sizeof(int), MPI_CHAR, src, current, MPI_COMM_WORLD, &request);
- MPI_Status status = mpi_request_pool->get_status(request);
+ MPI_Status status = mpi_request_pool->get_status(request);
- int recv_data_count;
- MPI_Get_count(&status, MPI_CHAR, &recv_data_count);
- },
- field[current].at({0}).write(),
- mpi_config.read())
- .scheduling_tags({SCHED_MPI})
+ int recv_data_count;
+ MPI_Get_count(&status, MPI_CHAR, &recv_data_count);
+ },
+ field[current].at({0}).write(),
+ mpi_config.read())
.enable_stack_switching();
/*
* Compute iteration
*/
for(size_t i = 1; i < field[current]->size(); ++i)
- rg::emplace_task(
+ rg.emplace_task(
[i](auto dst, auto src) { dst[{i}] = src[{i - 1}]; },
field[next].at({i}).write(),
field[current].at({i - 1}).read());
@@ -164,10 +173,10 @@ int main()
/*
* Write Output
*/
- rg::emplace_task(
- [i](auto buf, auto mpi_config)
+ rg.emplace_task(
+ [j](auto buf, auto mpi_config)
{
- std::cout << "Step[" << i << "], rank[" << mpi_config->world_rank << "] :: ";
+ std::cout << "Step[" << j << "], rank[" << mpi_config->world_rank << "] :: ";
for(size_t i = 0; i < buf->size(); ++i)
std::cout << buf[{i}] << "; ";
std::cout << std::endl;
@@ -178,7 +187,5 @@ int main()
current = next;
}
- rg::emplace_task([](auto m) { MPI_Finalize(); }, mpi_config.write()).scheduling_tags({SCHED_MPI});
-
- rg::finalize();
+ rg.emplace_task([]([[maybe_unused]] auto m) { MPI_Finalize(); }, mpi_config.write());
}
diff --git a/redGrapes/SchedulerDescription.hpp b/redGrapes/SchedulerDescription.hpp
new file mode 100644
index 00000000..279330e3
--- /dev/null
+++ b/redGrapes/SchedulerDescription.hpp
@@ -0,0 +1,40 @@
+/* Copyright 2024 Tapish Narwal
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#pragma once
+
+#include
+
+namespace redGrapes
+{
+
+ struct DefaultTag
+ {
+ };
+
+ template
+ concept C_Exec = requires(T execDesc)
+ {
+ typename T::Key;
+ typename T::ValueType;
+ {execDesc.scheduler};
+ };
+
+ template
+ struct SchedulerDescription
+ {
+ using Key = TTag;
+ using ValueType = TScheduler;
+
+ SchedulerDescription(std::shared_ptr scheduler, TTag = DefaultTag{}) : scheduler{scheduler}
+ {
+ }
+
+ std::shared_ptr scheduler;
+ };
+
+} // namespace redGrapes
diff --git a/redGrapes/TaskCtx.hpp b/redGrapes/TaskCtx.hpp
new file mode 100644
index 00000000..42e5a34c
--- /dev/null
+++ b/redGrapes/TaskCtx.hpp
@@ -0,0 +1,84 @@
+/* Copyright 2024 Tapish Narwal
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#pragma once
+
+#include "redGrapes/TaskFreeCtx.hpp"
+#include "redGrapes/scheduler/event.hpp"
+#include "redGrapes/task/task_space.hpp"
+
+#include
+
+#include
+
+namespace redGrapes
+{
+ template
+ struct TaskCtx
+ {
+ //! pause the currently running task at least until event is reached
+ // else is supposed to be called when .get() is called on emplace task, which calls the future .get(), so there
+ // is no current task at that time, unless this is in a child task space. we can assert(event.task != 0);
+ static void yield(scheduler::EventPtr event)
+ {
+ if(current_task)
+ {
+ while(!event->is_reached())
+ current_task->yield(event);
+ }
+ else
+ {
+ event->waker_id = event.task->scheduler_p->getNextWorkerID() + 1;
+ while(!event->is_reached())
+ TaskFreeCtx::idle();
+ }
+ }
+
+ /*! Create an event on which the termination of the current task depends.
+ * A task must currently be running.
+ *
+ * @return Handle to flag the event with `reach_event` later.
+ * nullopt if there is no task running currently
+ */
+ static std::optional> create_event()
+ {
+ if(current_task)
+ return current_task->make_event();
+ else
+ return std::nullopt;
+ }
+
+ static std::shared_ptr> current_task_space()
+ {
+ if(current_task)
+ {
+ if(!current_task->children)
+ {
+ auto task_space = std::make_shared>(current_task);
+ SPDLOG_TRACE("create child space = {}", (void*) task_space.get());
+ current_task->children = task_space;
+ }
+
+ return current_task->children;
+ }
+ else
+ return root_space;
+ }
+
+ static unsigned scope_depth()
+ {
+ if(auto ts = current_task_space())
+ return ts->depth;
+ else
+ return 0;
+ }
+
+ static inline thread_local TTask* current_task;
+ static inline std::shared_ptr> root_space;
+ };
+
+} // namespace redGrapes
diff --git a/redGrapes/TaskFreeCtx.hpp b/redGrapes/TaskFreeCtx.hpp
new file mode 100644
index 00000000..d32bd212
--- /dev/null
+++ b/redGrapes/TaskFreeCtx.hpp
@@ -0,0 +1,46 @@
+/* Copyright 2024 Tapish Narwal
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#pragma once
+
+#include "redGrapes/memory/chunked_bump_alloc.hpp"
+#include "redGrapes/memory/hwloc_alloc.hpp"
+
+#include
+#include
+#include
+#include
+
+namespace redGrapes
+{
+
+ using WorkerId = unsigned;
+
+ // seperated to not templatize allocators with Task type
+ struct WorkerAllocPool
+ {
+ public:
+ inline memory::ChunkedBumpAlloc& get_alloc(WorkerId worker_id)
+ {
+ assert(worker_id < allocs.size());
+ return allocs[worker_id];
+ }
+
+ std::vector> allocs;
+ };
+
+ struct TaskFreeCtx
+ {
+ static inline unsigned n_workers;
+ static inline unsigned n_pus;
+ static inline HwlocContext hwloc_ctx;
+ static inline std::shared_ptr worker_alloc_pool;
+
+ static inline thread_local std::function idle = [] {};
+ static inline thread_local std::optional current_worker_id;
+ };
+} // namespace redGrapes
diff --git a/redGrapes/dispatch/cuda/task_properties.hpp b/redGrapes/dispatch/cuda/cuda_task_properties.hpp
similarity index 76%
rename from redGrapes/dispatch/cuda/task_properties.hpp
rename to redGrapes/dispatch/cuda/cuda_task_properties.hpp
index e8532fc0..f20634e6 100644
--- a/redGrapes/dispatch/cuda/task_properties.hpp
+++ b/redGrapes/dispatch/cuda/cuda_task_properties.hpp
@@ -1,4 +1,4 @@
-/* Copyright 2020 Michael Sippel
+/* Copyright 2024 Tapish Narwal
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
@@ -7,6 +7,8 @@
#pragma once
+#include
+
namespace redGrapes
{
namespace dispatch
@@ -16,7 +18,7 @@ namespace redGrapes
struct CudaTaskProperties
{
- std::optional cuda_event;
+ std::optional m_cuda_stream_idx;
CudaTaskProperties()
{
@@ -30,6 +32,12 @@ namespace redGrapes
Builder(PropertiesBuilder& b) : builder(b)
{
}
+
+ PropertiesBuilder& cuda_stream_index(unsigned cuda_stream_idx)
+ {
+ *(builder.task->m_cuda_stream_idx) = cuda_stream_idx;
+ return builder;
+ }
};
struct Patch
diff --git a/redGrapes/dispatch/cuda/cuda_worker.hpp b/redGrapes/dispatch/cuda/cuda_worker.hpp
new file mode 100644
index 00000000..ee861848
--- /dev/null
+++ b/redGrapes/dispatch/cuda/cuda_worker.hpp
@@ -0,0 +1,267 @@
+/* Copyright 2024 Tapish Narwal
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#pragma once
+
+#include "redGrapes/TaskCtx.hpp"
+#include "redGrapes/dispatch/cuda/cuda_task_properties.hpp"
+#include "redGrapes/dispatch/cuda/event_pool.hpp"
+#include "redGrapes/scheduler/event.hpp"
+#include "redGrapes/sync/cv.hpp"
+#include "redGrapes/task/queue.hpp"
+
+#include
+#include
+#include
+
+#include
+
+namespace redGrapes::dispatch::cuda
+{
+ struct CudaStreamWrapper
+ {
+ cudaStream_t cuda_stream;
+
+ CudaStreamWrapper()
+ {
+ cudaStreamCreate(&cuda_stream);
+ }
+
+ CudaStreamWrapper(CudaStreamWrapper const& other)
+ {
+ spdlog::warn("CudaStreamWrapper copy constructor called!");
+ }
+
+ ~CudaStreamWrapper()
+ {
+ cudaStreamDestroy(cuda_stream);
+ }
+ };
+
+ // this class is not thread safe
+ // Stream dispatcher
+ template
+ struct CudaWorker
+
+ {
+ using task_type = TTask;
+
+ WorkerId id;
+ std::vector streams;
+ EventPool event_pool;
+
+ /*! if true, the thread shall stop
+ * instead of waiting when it is out of jobs
+ */
+ std::atomic_bool m_stop{false};
+ std::atomic task_count{0};
+
+
+ std::queue>> events;
+ std::recursive_mutex mutex;
+
+ //! condition variable for waiting if queue is empty
+ CondVar cv;
+
+ static constexpr size_t queue_capacity = 128;
+ task::Queue emplacement_queue{queue_capacity};
+ task::Queue ready_queue{queue_capacity};
+
+ CudaWorker(WorkerId worker_id) : id(worker_id)
+ {
+ }
+
+ CudaWorker(WorkerId worker_id, unsigned num_streams) : id{worker_id}, streams{num_streams}
+ {
+ }
+
+ inline scheduler::WakerId get_waker_id()
+ {
+ return id + 1;
+ }
+
+ inline bool wake()
+ {
+ return cv.notify();
+ }
+
+ void stop()
+ {
+ SPDLOG_TRACE("Worker::stop()");
+ m_stop.store(true, std::memory_order_release);
+ wake();
+ }
+
+ /* adds a new task to the emplacement queue
+ * and wakes up thread to kickstart execution
+ */
+ inline void dispatch_task(TTask& task)
+ {
+ emplacement_queue.push(&task);
+ wake();
+ }
+
+ inline void execute_task(TTask& task)
+ {
+ TRACE_EVENT("Worker", "dispatch task");
+
+ SPDLOG_DEBUG("cuda thread dispatch: execute task {}", task.task_id);
+ assert(task.is_ready());
+ std::lock_guard lock(mutex);
+
+ TaskCtx::current_task = &task;
+
+ // run the code that calls the CUDA API and submits work to *task->m_cuda_stream_idx
+ auto event = task();
+
+ cudaEvent_t cuda_event = event_pool.alloc();
+ cudaEventRecord(cuda_event, streams[*(task->m_cuda_stream_idx)].cuda_stream);
+ auto my_event = TaskCtx::create_event();
+ events.push(std::make_pair(cuda_event, *my_event));
+ SPDLOG_TRACE(
+ "CudaStreamDispatcher {}: recorded event {}",
+ streams[*(task->m_cuda_stream_idx)].cuda_stream,
+ cuda_event);
+
+ // TODO figure out the correct position for this
+ task.get_pre_event().notify();
+
+ if(event)
+ {
+ event->get_event().waker_id = get_waker_id();
+ task.sg_pause(*event);
+
+ task.pre_event.up();
+ task.get_pre_event().notify();
+ }
+ else
+ task.get_post_event().notify();
+
+ TaskCtx::current_task = nullptr;
+ }
+
+ /* repeatedly try to find and execute tasks
+ * until stop-flag is triggered by stop()
+ */
+ void work_loop()
+ {
+ SPDLOG_TRACE("Worker {} start work_loop()", this->id);
+ while(!this->m_stop.load(std::memory_order_consume))
+ {
+ // this->cv.wait(); // TODO fix this by fixing event_ptr notify to wake
+
+ while(TTask* task = this->gather_task())
+ {
+ execute_task(*task);
+ poll(); // TODO fix where to poll
+ }
+ poll();
+ }
+ SPDLOG_TRACE("Worker {} end work_loop()", this->id);
+ }
+
+ /* find a task that shall be executed next
+ */
+ TTask* gather_task()
+ {
+ {
+ TRACE_EVENT("Worker", "gather_task()");
+ TTask* task = nullptr;
+
+ /* STAGE 1:
+ *
+ * first, execute all tasks in the ready queue
+ */
+ SPDLOG_TRACE("Worker {}: consume ready queue", id);
+ if((task = ready_queue.pop()))
+ return task;
+
+ /* STAGE 2:
+ *
+ * after the ready queue is fully consumed,
+ * try initializing new tasks until one
+ * of them is found to be ready
+ */
+ SPDLOG_TRACE("Worker {}: try init new tasks", id);
+ while(this->init_dependencies(task, true))
+ if(task)
+ return task;
+
+ return task;
+ }
+ }
+
+ /*! take a task from the emplacement queue and initialize it,
+ * @param t is set to the task if the new task is ready,
+ * @param t is set to nullptr if the new task is blocked.
+ * @param claimed if set, the new task will not be actiated,
+ * if it is false, activate_task will be called by notify_event
+ *
+ * @return false if queue is empty
+ */
+ bool init_dependencies(TTask*& t, bool claimed = true)
+ {
+ {
+ TRACE_EVENT("Worker", "init_dependencies()");
+ if(TTask* task = emplacement_queue.pop())
+ {
+ SPDLOG_DEBUG("init task {}", task->task_id);
+
+ task->pre_event.up();
+ task->init_graph();
+
+ if(task->get_pre_event().notify(claimed))
+ t = task;
+ else
+ {
+ t = nullptr;
+ }
+
+ return true;
+ }
+ else
+ return false;
+ }
+ }
+
+ //! checks if some cuda calls finished and notify the redGrapes manager
+ void poll()
+ {
+ std::lock_guard lock(mutex);
+ if(!events.empty())
+ {
+ auto& cuda_event = events.front().first;
+ auto& event = events.front().second;
+
+ if(cudaEventQuery(cuda_event) == cudaSuccess)
+ {
+ SPDLOG_TRACE("cuda event {} ready", cuda_event);
+ event_pool.free(cuda_event);
+ event.notify();
+
+ events.pop();
+ }
+ }
+ }
+ };
+
+} // namespace redGrapes::dispatch::cuda
+
+template<>
+struct fmt::formatter
+{
+ constexpr auto parse(format_parse_context& ctx)
+ {
+ return ctx.begin();
+ }
+
+ template
+ auto format(redGrapes::dispatch::cuda::CudaTaskProperties const& prop, FormatContext& ctx)
+ {
+ return fmt::format_to(ctx.out(), "\"cuda_stream_idx\" : {}", *(prop.m_cuda_stream_idx));
+ }
+};
diff --git a/redGrapes/dispatch/cuda/event_pool.hpp b/redGrapes/dispatch/cuda/event_pool.hpp
index a8123117..87aabec1 100644
--- a/redGrapes/dispatch/cuda/event_pool.hpp
+++ b/redGrapes/dispatch/cuda/event_pool.hpp
@@ -1,4 +1,4 @@
-/* Copyright 2020 Michael Sippel
+/* Copyright 2020-2024 Michael Sippel, Tapish Narwal
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
@@ -7,6 +7,8 @@
#pragma once
+#include
+
#include
#include
@@ -28,12 +30,6 @@ namespace redGrapes
{
}
- static EventPool& get()
- {
- static EventPool singleton;
- return singleton;
- }
-
~EventPool()
{
std::lock_guard lock(mutex);
diff --git a/redGrapes/dispatch/cuda/scheduler.hpp b/redGrapes/dispatch/cuda/scheduler.hpp
deleted file mode 100644
index 20d8f4a4..00000000
--- a/redGrapes/dispatch/cuda/scheduler.hpp
+++ /dev/null
@@ -1,194 +0,0 @@
-/* Copyright 2020 Michael Sippel
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- */
-
-#pragma once
-
-#include
-#include
-#include
-#include
-#include
-
-#include
-#include
-
-#include
-#include
-#include
-#include
-#include
-
-namespace redGrapes
-{
- namespace dispatch
- {
- namespace cuda
- {
-
- thread_local cudaStream_t current_stream;
-
- // this class is not thread safe
- template
- struct CudaStreamDispatcher
- {
- cudaStream_t cuda_stream;
- std::recursive_mutex mutex;
- std::queue> events;
-
- CudaStreamDispatcher()
- {
- cudaStreamCreate(&cuda_stream);
- }
-
- CudaStreamDispatcher(CudaStreamDispatcher const& other)
- {
- spdlog::warn("CudaStreamDispatcher copy constructor called!");
- }
-
- ~CudaStreamDispatcher()
- {
- cudaStreamDestroy(cuda_stream);
- }
-
- void poll()
- {
- std::lock_guard lock(mutex);
- if(!events.empty())
- {
- auto& cuda_event = events.front().first;
- auto& event = events.front().second;
-
- if(cudaEventQuery(cuda_event) == cudaSuccess)
- {
- SPDLOG_TRACE("cuda event {} ready", cuda_event);
- EventPool::get().free(cuda_event);
- event.notify();
-
- events.pop();
- }
- }
- }
-
- void dispatch_task(Task& task)
- {
- std::lock_guard lock(mutex);
-
- for(auto predecessor : task.in_edges)
- {
- SPDLOG_TRACE("cudaDispatcher: consider predecessor \"{}\"", predecessor->label);
-
- if(auto cuda_event = predecessor->cuda_event)
- {
- SPDLOG_TRACE(
- "cudaDispatcher: task {} \"{}\" wait for {}",
- task.task_id,
- task.label,
- *cuda_event);
-
- cudaStreamWaitEvent(cuda_stream, *cuda_event, 0);
- }
- }
-
- SPDLOG_TRACE("CudaScheduler: start {}", task_id);
-
- current_stream = cuda_stream;
-
- // run the code that calls the CUDA API and submits work to current_stream
- task->run();
-
- cudaEvent_t cuda_event = EventPool::get().alloc();
- cudaEventRecord(cuda_event, cuda_stream);
- task->cuda_event = cuda_event;
-
- task->get_pre_event().notify();
-
- SPDLOG_TRACE("CudaStreamDispatcher {}: recorded event {}", cuda_stream, cuda_event);
- events.push(std::make_pair(cuda_event, task->get_post_event()));
- }
- };
-
- struct CudaScheduler : redGrapes::scheduler::IScheduler
- {
- private:
- bool recording;
- bool cuda_graph_enabled;
-
- std::recursive_mutex mutex;
- unsigned int current_stream;
- std::vector> streams;
-
- std::function is_cuda_task;
-
- public:
- CudaScheduler(
- std::function is_cuda_task,
- size_t stream_count = 1,
- bool cuda_graph_enabled = false)
- : is_cuda_task(is_cuda_task)
- , current_stream(0)
- , cuda_graph_enabled(cuda_graph_enabled)
- {
- // reserve to avoid copy constructor of CudaStreamDispatcher
- streams.reserve(stream_count);
-
- for(size_t i = 0; i < stream_count; ++i)
- streams.emplace_back();
-
- SPDLOG_TRACE("CudaScheduler: use {} streams", streams.size());
- }
-
- //! submits the call to the cuda runtime
- void activate_task(Task& task)
- {
- unsigned int stream_id = current_stream;
- current_stream = (current_stream + 1) % streams.size();
-
- SPDLOG_TRACE("Dispatch Cuda task {} \"{}\" on stream {}", task.task_id, task.label, stream_id);
- streams[stream_id].dispatch_task(task);
- }
-
- //! checks if some cuda calls finished and notify the redGrapes manager
- void poll()
- {
- for(size_t stream_id = 0; stream_id < streams.size(); ++stream_id)
- streams[stream_id].poll();
- }
-
- /*! whats the task dependency type for the edge a -> b (task a precedes task b)
- * @return true if task b depends on the pre event of task a, false if task b depends on the post event
- * of task b.
- */
- bool task_dependency_type(Task const& a, Task const& b)
- {
- assert(is_cuda_task(b));
- return is_cuda_task(a);
- }
- };
-
- } // namespace cuda
-
- } // namespace dispatch
-
-} // namespace redGrapes
-
-template<>
-struct fmt::formatter
-{
- constexpr auto parse(format_parse_context& ctx)
- {
- return ctx.begin();
- }
-
- template
- auto format(redGrapes::dispatch::cuda::CudaTaskProperties const& prop, FormatContext& ctx)
- {
- if(auto e = prop.cuda_event)
- return fmt::format_to(ctx.out(), "\"cuda_event\" : {}", *e);
- else
- return fmt::format_to(ctx.out(), "\"cuda_event\" : null");
- }
-};
diff --git a/redGrapes/dispatch/cupla/scheduler.hpp b/redGrapes/dispatch/cupla/scheduler.hpp
index 7491567c..17bb36df 100644
--- a/redGrapes/dispatch/cupla/scheduler.hpp
+++ b/redGrapes/dispatch/cupla/scheduler.hpp
@@ -1,4 +1,4 @@
-/* Copyright 2020 Michael Sippel
+/* Copyright 2020-2024 Michael Sippel, Tapish Narwal
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
@@ -7,11 +7,9 @@
#pragma once
-#include
-#include
-#include
-#include
-#include
+#include "redGrapes/dispatch/cupla/event_pool.hpp"
+#include "redGrapes/dispatch/cupla/task_properties.hpp"
+#include "redGrapes/scheduler/scheduler.hpp"
#include
#include
@@ -111,7 +109,8 @@ namespace redGrapes
}
};
- struct CuplaScheduler : redGrapes::scheduler::IScheduler
+ template
+ struct CuplaScheduler : redGrapes::scheduler::IScheduler
{
private:
bool recording;
@@ -119,13 +118,13 @@ namespace redGrapes
std::recursive_mutex mutex;
unsigned int current_stream;
- std::vector> streams;
+ std::vector> streams;
- std::function is_cupla_task;
+ std::function is_cupla_task;
public:
CuplaScheduler(
- std::function is_cupla_task,
+ std::function is_cupla_task,
size_t stream_count = 1,
bool cupla_graph_enabled = false)
: is_cupla_task(is_cupla_task)
@@ -142,7 +141,7 @@ namespace redGrapes
}
//! submits the call to the cupla runtime
- void activate_task(Task& task)
+ void activate_task(TTask& task)
{
unsigned int stream_id = current_stream;
current_stream = (current_stream + 1) % streams.size();
@@ -162,7 +161,7 @@ namespace redGrapes
* @return true if task b depends on the pre event of task a, false if task b depends on the post event
* of task b.
*/
- bool task_dependency_type(Task const& a, Task const& b)
+ bool task_dependency_type(TTask const& a, TTask const& b)
{
assert(is_cupla_task(b));
return is_cupla_task(a);
diff --git a/redGrapes/dispatch/dispatcher.hpp b/redGrapes/dispatch/dispatcher.hpp
deleted file mode 100644
index 5896a741..00000000
--- a/redGrapes/dispatch/dispatcher.hpp
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
-struct IDispatcher
-{
- virtual ~IDispatcher() = 0;
-
- virtual void dispatch_task( TaskVertexPtr task_vertex ) = 0;
- virtual void notify() = 0;
-};
-*/
diff --git a/redGrapes/dispatch/mpi/mpiWorker.hpp b/redGrapes/dispatch/mpi/mpiWorker.hpp
new file mode 100644
index 00000000..7230161a
--- /dev/null
+++ b/redGrapes/dispatch/mpi/mpiWorker.hpp
@@ -0,0 +1,191 @@
+/* Copyright 2024 Tapish Narwal
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#pragma once
+#include "redGrapes/TaskFreeCtx.hpp"
+#include "redGrapes/dispatch/mpi/request_pool.hpp"
+#include "redGrapes/sync/cv.hpp"
+#include "redGrapes/task/queue.hpp"
+
+#include
+
+namespace redGrapes
+{
+ namespace dispatch
+ {
+ namespace mpi
+ {
+
+ template
+ struct MPIWorker
+ {
+ using task_type = TTask;
+ std::shared_ptr> requestPool;
+ WorkerId id;
+
+ /*! if true, the thread shall stop
+ * instead of waiting when it is out of jobs
+ */
+ std::atomic_bool m_stop{false};
+ std::atomic task_count{0};
+
+ //! condition variable for waiting if queue is empty
+ CondVar cv;
+
+ static constexpr size_t queue_capacity = 128;
+ task::Queue