Skip to content

Commit

Permalink
Merge pull request #25 from koide3/tbb
Browse files Browse the repository at this point in the history
TBB backend
  • Loading branch information
koide3 authored Sep 23, 2024
2 parents 51df146 + f2bb272 commit 0997e0b
Show file tree
Hide file tree
Showing 35 changed files with 1,451 additions and 594 deletions.
32 changes: 27 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,30 +12,43 @@ endif()
option(BUILD_TESTS "Build test" OFF)
option(BUILD_DEMO "Build demo programs" OFF)
option(BUILD_EXAMPLE "Build example programs" OFF)
option(BUILD_WITH_TBB "Build with TBB support" ON)
option(BUILD_WITH_OPENMP "Build with OpenMP support" ON)
option(BUILD_WITH_CUDA "Build with GPU support" OFF)
option(BUILD_WITH_CUDA_MULTIARCH "Build with CUDA multi-architecture support" OFF)
option(BUILD_WITH_MARCH_NATIVE "Build with -march=native" OFF)
option(ENABLE_CPPCHECK "Enable cppcheck" OFF)
option(ENABLE_COVERAGE "Enable coverage check" OFF)

if(BUILD_WITH_MARCH_NATIVE)
set(GTSAM_POINTS_WITH_MARCH_NATIVE 1)
add_compile_options(-march=native)
set(CMAKE_C_FLAGS "-march=native ${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "-march=native ${CMAKE_CXX_FLAGS}")
endif()

find_package(Boost REQUIRED COMPONENTS filesystem)
find_package(GTSAM REQUIRED)
find_package(OpenMP REQUIRED)
find_package(Eigen3 REQUIRED)

if(BUILD_WITH_TBB)
find_package(TBB REQUIRED)
set(GTSAM_POINTS_USE_TBB 1)
endif()

if(BUILD_WITH_OPENMP)
find_package(OpenMP REQUIRED)
set(GTSAM_POINTS_USE_OPENMP 1)
endif()

if(${BUILD_WITH_CUDA} AND ${EIGEN3_VERSION_STRING} AND ${EIGEN3_VERSION_STRING} VERSION_LESS "3.3.90")
message(WARNING "Detected Eigen ${EIGEN3_VERSION_STRING} is not compatible with CUDA")
message(WARNING "Use Eigen 3.3.90 or later (3.4.0 is recommended)")
endif()

# GPU-related
if(BUILD_WITH_CUDA)
set(GTSAM_POINTS_USE_CUDA 1)
add_definitions(-DBUILD_GTSAM_POINTS_GPU)

find_package(CUDAToolkit REQUIRED)
Expand Down Expand Up @@ -105,6 +118,8 @@ if(ENABLE_COVERAGE)
WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
endif()

configure_file(include/gtsam_points/config.hpp.in include/gtsam_points/config.hpp)

###########
## Build ##
###########
Expand All @@ -115,6 +130,7 @@ add_library(gtsam_points SHARED
src/gtsam_points/util/normal_estimation.cpp
src/gtsam_points/util/bspline.cpp
src/gtsam_points/util/continuous_trajectory.cpp
src/gtsam_points/util/parallelism.cpp
# ann
src/gtsam_points/ann/kdtree.cpp
src/gtsam_points/ann/intensity_kdtree.cpp
Expand Down Expand Up @@ -158,14 +174,16 @@ add_library(gtsam_points SHARED
target_include_directories(gtsam_points PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/nanoflann/include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include>
$<INSTALL_INTERFACE:include>
)
target_link_libraries(gtsam_points
Boost::boost
Boost::filesystem
Eigen3::Eigen
GTSAM::GTSAM
OpenMP::OpenMP_CXX
$<TARGET_NAME_IF_EXISTS:TBB::tbb>
$<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
)

# GPU-related
Expand Down Expand Up @@ -206,11 +224,12 @@ if(BUILD_WITH_CUDA)
$<INSTALL_INTERFACE:include>
)
target_link_libraries(gtsam_points_cuda
CUDA::cudart
Boost::boost
Eigen3::Eigen
GTSAM::GTSAM
OpenMP::OpenMP_CXX
CUDA::cudart
$<TARGET_NAME_IF_EXISTS:TBB::tbb>
$<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
)

target_link_libraries(gtsam_points
Expand Down Expand Up @@ -274,7 +293,10 @@ endif()
#############

include(GNUInstallDirs)
install(DIRECTORY include/ thirdparty/nanoflann/include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
install(
DIRECTORY include/ thirdparty/nanoflann/include/ ${CMAKE_CURRENT_BINARY_DIR}/include/
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
)

list(APPEND GTSAM_POINTS_LIBRARIES gtsam_points)
if(BUILD_WITH_CUDA)
Expand Down
4 changes: 3 additions & 1 deletion cmake/gtsam_points-config.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@

include_guard()

set(BUILD_WITH_CUDA @BUILD_WITH_CUDA@)
set(GTSAM_POINTS_USE_TBB @GTSAM_POINTS_USE_TBB@)
set(GTSAM_POINTS_USE_OPENMP @GTSAM_POINTS_USE_OPENMP@)
set(GTSAM_POINTS_USE_CUDA @GTSAM_POINTS_USE_CUDA@)

get_filename_component(gtsam_points_CURRENT_CONFIG_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${gtsam_points_CURRENT_CONFIG_DIR}")
Expand Down
13 changes: 12 additions & 1 deletion include/gtsam_points/ann/kdtree2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
#include <iostream>
#include <Eigen/Core>

#include <gtsam_points/config.hpp>
#include <gtsam_points/ann/small_kdtree.hpp>
#include <gtsam_points/types/frame_traits.hpp>
#include <gtsam_points/ann/nearest_neighbor_search.hpp>
#include <gtsam_points/util/parallelism.hpp>

namespace gtsam_points {

Expand All @@ -24,7 +26,16 @@ struct KdTree2 : public NearestNeighborSearch {
KdTree2(const std::shared_ptr<const Frame>& frame, int build_num_threads = 1)
: frame(frame),
search_eps(-1.0),
index(new Index(*this->frame, KdTreeBuilderOMP(build_num_threads))) {
index(
is_omp_default() || build_num_threads == 1 ? //
new Index(*this->frame, KdTreeBuilderOMP(build_num_threads)) //
: //
#ifdef GTSAM_POINTS_USE_TBB //
new Index(*this->frame, KdTreeBuilderTBB()) //
#else //
new Index(*this->frame, KdTreeBuilder())
#endif
) {
if (frame::size(*frame) == 0) {
std::cerr << "error: empty frame is given for KdTree2" << std::endl;
std::cerr << " : frame::size() may not be implemented" << std::endl;
Expand Down
76 changes: 76 additions & 0 deletions include/gtsam_points/ann/small_kdtree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,14 @@
#include <numeric>
#include <Eigen/Core>

#include <gtsam_points/config.hpp>
#include <gtsam_points/ann/knn_result.hpp>
#include <gtsam_points/types/frame_traits.hpp>

#ifdef GTSAM_POINTS_USE_TBB
#include <tbb/parallel_invoke.h>
#endif

namespace gtsam_points {

/// @brief Parameters to control the projection axis search.
Expand Down Expand Up @@ -266,6 +271,77 @@ struct KdTreeBuilderOMP {
ProjectionSetting projection_setting; ///< Projection setting.
};

#ifdef GTSAM_POINTS_USE_TBB
/// @brief Kd-tree builder with TBB.
struct KdTreeBuilderTBB {
public:
/// @brief Build KdTree
template <typename KdTree, typename PointCloud>
void build_tree(KdTree& kdtree, const PointCloud& points) const {
kdtree.indices.resize(frame::size(points));
std::iota(kdtree.indices.begin(), kdtree.indices.end(), 0);

std::atomic_uint64_t node_count = 0;
kdtree.nodes.resize(frame::size(points));
kdtree.root = create_node(kdtree, node_count, points, kdtree.indices.begin(), kdtree.indices.begin(), kdtree.indices.end());
kdtree.nodes.resize(node_count);
}

/// @brief Create a Kd-tree node from the given point indices.
/// @param global_first Global first point index iterator (i.e., this->indices.begin()).
/// @param first First point index iterator to be scanned.
/// @param last Last point index iterator to be scanned.
/// @return Index of the created node.
template <typename PointCloud, typename KdTree, typename IndexConstIterator>
NodeIndexType create_node(
KdTree& kdtree,
std::atomic_uint64_t& node_count,
const PointCloud& points,
IndexConstIterator global_first,
IndexConstIterator first,
IndexConstIterator last) const {
const size_t N = std::distance(first, last);
const NodeIndexType node_index = node_count++;
auto& node = kdtree.nodes[node_index];

// Create a leaf node.
if (N <= max_leaf_size) {
// std::sort(first, last);
node.node_type.lr.first = std::distance(global_first, first);
node.node_type.lr.last = std::distance(global_first, last);

return node_index;
}

// Find the best axis to split the input points.
using Projection = typename KdTree::Projection;
const auto proj = Projection::find_axis(points, first, last, projection_setting);
const auto median_itr = first + N / 2;
std::nth_element(first, median_itr, last, [&](size_t i, size_t j) { return proj(frame::point(points, i)) < proj(frame::point(points, j)); });

// Create a non-leaf node.
node.node_type.sub.proj = proj;
node.node_type.sub.thresh = proj(frame::point(points, *median_itr));

// Create left and right child nodes.
if (N > 512) {
tbb::parallel_invoke(
[&] { node.left = create_node(kdtree, node_count, points, global_first, first, median_itr); },
[&] { node.right = create_node(kdtree, node_count, points, global_first, median_itr, last); });
} else {
node.left = create_node(kdtree, node_count, points, global_first, first, median_itr);
node.right = create_node(kdtree, node_count, points, global_first, median_itr, last);
}

return node_index;
}

public:
int max_leaf_size = 20; ///< Maximum number of points in a leaf node.
ProjectionSetting projection_setting; ///< Projection setting.
};
#endif

/// @brief "Unsafe" KdTree.
/// @note This class does not hold the ownership of the input points.
/// You must keep the input points along with this class.
Expand Down
18 changes: 18 additions & 0 deletions include/gtsam_points/config.hpp.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2021 Kenji Koide ([email protected])

#pragma once

// Library version
#define GTSAM_POINTS_VERSION_MAJOR @CMAKE_PROJECT_VERSION_MAJOR@
#define GTSAM_POINTS_VERSION_MINOR @CMAKE_PROJECT_VERSION_MINOR@
#define GTSAM_POINTS_VERSION_PATCH @CMAKE_PROJECT_VERSION_PATCH@
#define GTSAM_POINTS_VERSION_STRING "@CMAKE_PROJECT_VERSION@"

#cmakedefine GTSAM_POINTS_USE_TBB

#cmakedefine GTSAM_POINTS_USE_OPENMP

#cmakedefine GTSAM_POINTS_USE_CUDA

#cmakedefine GTSAM_POINTS_WITH_MARCH_NATIVE
Loading

0 comments on commit 0997e0b

Please sign in to comment.