diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..477c6f3 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,209 @@ +cmake_minimum_required(VERSION 3.0.2) +project(gvom) + +## Compile as C++11, supported in ROS Kinetic and newer +# add_compile_options(-std=c++11) + +## Find catkin macros and libraries +## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz) +## is used, also find other catkin packages +find_package(catkin REQUIRED COMPONENTS + nav_msgs + ros_numpy + rospy + sensor_msgs + tf + tf2_ros +) + +## System dependencies are found with CMake's conventions +# find_package(Boost REQUIRED COMPONENTS system) + + +## Uncomment this if the package has a setup.py. This macro ensures +## modules and global scripts declared therein get installed +## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html +# catkin_python_setup() + +################################################ +## Declare ROS messages, services and actions ## +################################################ + +## To declare and build messages, services or actions from within this +## package, follow these steps: +## * Let MSG_DEP_SET be the set of packages whose message types you use in +## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...). +## * In the file package.xml: +## * add a build_depend tag for "message_generation" +## * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET +## * If MSG_DEP_SET isn't empty the following dependency has been pulled in +## but can be declared for certainty nonetheless: +## * add a exec_depend tag for "message_runtime" +## * In this file (CMakeLists.txt): +## * add "message_generation" and every package in MSG_DEP_SET to +## find_package(catkin REQUIRED COMPONENTS ...) +## * add "message_runtime" and every package in MSG_DEP_SET to +## catkin_package(CATKIN_DEPENDS ...) +## * uncomment the add_*_files sections below as needed +## and list every .msg/.srv/.action file to be processed +## * uncomment the generate_messages entry below +## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...) + +## Generate messages in the 'msg' folder +# add_message_files( +# FILES +# Message1.msg +# Message2.msg +# ) + +## Generate services in the 'srv' folder +# add_service_files( +# FILES +# Service1.srv +# Service2.srv +# ) + +## Generate actions in the 'action' folder +# add_action_files( +# FILES +# Action1.action +# Action2.action +# ) + +## Generate added messages and services with any dependencies listed here +# generate_messages( +# DEPENDENCIES +# nav_msgs# sensor_msgs +# ) + +################################################ +## Declare ROS dynamic reconfigure parameters ## +################################################ + +## To declare and build dynamic reconfigure parameters within this +## package, follow these steps: +## * In the file package.xml: +## * add a build_depend and a exec_depend tag for "dynamic_reconfigure" +## * In this file (CMakeLists.txt): +## * add "dynamic_reconfigure" to +## find_package(catkin REQUIRED COMPONENTS ...) +## * uncomment the "generate_dynamic_reconfigure_options" section below +## and list every .cfg file to be processed + +## Generate dynamic reconfigure parameters in the 'cfg' folder +# generate_dynamic_reconfigure_options( +# cfg/DynReconf1.cfg +# cfg/DynReconf2.cfg +# ) + +################################### +## catkin specific configuration ## +################################### +## The catkin_package macro generates cmake config files for your package +## Declare things to be passed to dependent projects +## INCLUDE_DIRS: uncomment this if your package contains header files +## LIBRARIES: libraries you create in this project that dependent projects also need +## CATKIN_DEPENDS: catkin_packages dependent projects also need +## DEPENDS: system dependencies of this project that dependent projects also need +catkin_package( +# INCLUDE_DIRS include +# LIBRARIES gvom +# CATKIN_DEPENDS nav_msgs ros_numpy rospy sensor_msgs tf tf2_ros +# DEPENDS system_lib +) + +########### +## Build ## +########### + +## Specify additional locations of header files +## Your package locations should be listed before other locations +include_directories( +# include + ${catkin_INCLUDE_DIRS} +) + +## Declare a C++ library +# add_library(${PROJECT_NAME} +# src/${PROJECT_NAME}/gvom.cpp +# ) + +## Add cmake target dependencies of the library +## as an example, code may need to be generated before libraries +## either from message generation or dynamic reconfigure +# add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) + +## Declare a C++ executable +## With catkin_make all packages are built within a single CMake context +## The recommended prefix ensures that target names across packages don't collide +# add_executable(${PROJECT_NAME}_node src/gvom_node.cpp) + +## Rename C++ executable without prefix +## The above recommended prefix causes long target names, the following renames the +## target back to the shorter version for ease of user use +## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node" +# set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "") + +## Add cmake target dependencies of the executable +## same as for the library above +# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) + +## Specify libraries to link a library or executable target against +# target_link_libraries(${PROJECT_NAME}_node +# ${catkin_LIBRARIES} +# ) + +############# +## Install ## +############# + +# all install targets should use catkin DESTINATION variables +# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html + +## Mark executable scripts (Python etc.) for installation +## in contrast to setup.py, you can choose the destination +# catkin_install_python(PROGRAMS +# scripts/my_python_script +# DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +# ) + +## Mark executables for installation +## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html +# install(TARGETS ${PROJECT_NAME}_node +# RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +# ) + +## Mark libraries for installation +## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_libraries.html +# install(TARGETS ${PROJECT_NAME} +# ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} +# LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} +# RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION} +# ) + +## Mark cpp header files for installation +# install(DIRECTORY include/${PROJECT_NAME}/ +# DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION} +# FILES_MATCHING PATTERN "*.h" +# PATTERN ".svn" EXCLUDE +# ) + +## Mark other files for installation (e.g. launch and bag files, etc.) +# install(FILES +# # myfile1 +# # myfile2 +# DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} +# ) + +############# +## Testing ## +############# + +## Add gtest based cpp test target and link libraries +# catkin_add_gtest(${PROJECT_NAME}-test test/test_gvom.cpp) +# if(TARGET ${PROJECT_NAME}-test) +# target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME}) +# endif() + +## Add folders to be run by python nosetests +# catkin_add_nosetests(test) diff --git a/README.md b/README.md new file mode 100755 index 0000000..e5bdc27 --- /dev/null +++ b/README.md @@ -0,0 +1,16 @@ +# G-VOM +## A GPU Accelerated Voxel Off-Road Mapping System + + +## 1. Prerequisites +### 1.1 **Ubuntu** and **ROS** +Ubuntu 64-bit 20.04. +ROS Noetic. [ROS Installation](http://wiki.ros.org/ROS/Installation) + + +### 1.2. **Numba** +Follow [Numba Installation](https://numba.pydata.org/numba-doc/latest/user/installing.html) and [Numba CUDA](https://numba.pydata.org/numba-doc/latest/cuda/overview.html#setting-cuda-installation-path). + +##ROS Example +An example ROS implementation is provided in gvom_ros.py. + diff --git a/package.xml b/package.xml new file mode 100644 index 0000000..9aa850a --- /dev/null +++ b/package.xml @@ -0,0 +1,77 @@ + + + gvom + 1.0.0 + G-VOM: A GPU Accelerated Voxel Off-Road Mapping System + + + + + Timothy Overbye + + + + + + TODO + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + catkin + nav_msgs + ros_numpy + rospy + sensor_msgs + tf + tf2_ros + nav_msgs + ros_numpy + rospy + sensor_msgs + tf + tf2_ros + nav_msgs + ros_numpy + rospy + sensor_msgs + tf + tf2_ros + + + + + + + + diff --git a/scripts/gvom.py b/scripts/gvom.py new file mode 100755 index 0000000..d0425d3 --- /dev/null +++ b/scripts/gvom.py @@ -0,0 +1,1229 @@ +import numba +from numba import vectorize, jit, cuda +import numpy as np +import math +import time +import threading + +class Gvom: + + """ + A class to take lidar scanns and create a costmap\n + xy_resolution: x,y resolution in metters of each voxel\n + z_resolution: z resolution in metters of each voxel\n + xy_size: Number of voxels in x,y\n + z_size: Number of voxels in z\n + buffer_size: Number of lidar scans to keep in memory\n + min_distance: Minimum point distance, any points closer than this will be discarded\n + + """ + + def __init__(self, xy_resolution, z_resolution, xy_size, z_size, buffer_size, min_distance,positive_obstacle_threshold, + negative_obstacle_threshold, slope_obsacle_threshold, robot_height, robot_radius,ground_to_lidar_height): + + # print("init") + + self.xy_resolution = xy_resolution + self.z_resolution = z_resolution + + self.xy_size = xy_size + self.z_size = z_size + + self.buffer_size = buffer_size + + self.metrics = np.array([[3,2]]) + + self.min_distance = min_distance + + self.positive_obstacle_threshold = positive_obstacle_threshold + self.negative_obstacle_threshold = negative_obstacle_threshold + self.slope_obsacle_threshold = slope_obsacle_threshold + self.robot_height = robot_height + self.robot_radius = robot_radius + self.ground_to_lidar_height = ground_to_lidar_height + + self.index_buffer = [] + self.hit_count_buffer = [] + self.total_count_buffer = [] + self.metrics_buffer = [] + self.origin_buffer = [] + self.min_height_buffer = [] + self.semaphores = [] + + self.combined_index_map = None + self.combined_hit_count = None + self.combined_total_count = None + self.combined_min_height = None + self.combined_origin = None + self.combined_metrics = None + self.combined_cell_count_cpu = None + + self.height_map = None + self.inferred_height_map = None + self.roughness_map = None + self.guessed_height_delta = None + + self.buffer_index = 0 + self.last_buffer_index = 0 + + self.voxel_count = self.xy_size*self.xy_size*self.z_size + + for i in range(self.buffer_size): + self.index_buffer.append(None) + self.hit_count_buffer.append(None) + self.total_count_buffer.append(None) + self.metrics_buffer.append(None) + self.origin_buffer.append(None) + self.min_height_buffer.append(None) + self.semaphores.append(threading.Semaphore()) + + self.last_combined_index_map = None + self.last_combined_hit_count = None + self.last_combined_total_count = None + self.last_combined_min_height = None + self.last_combined_origin = None + self.last_combined_metrics = None + self.last_combined_cell_count_cpu = None + + self.threads_per_block = 256 + self.threads_per_block_3D = (8, 8, 4) + self.threads_per_block_2D = (16, 16) + + self.blocks = math.ceil(self.xy_size*self.xy_size*self.z_size / self.threads_per_block) + + self.ego_semaphore = threading.Semaphore() + self.ego_position = [0,0,0] + + + + def Process_pointcloud(self, pointcloud, ego_position, transform=None): + """ Imports a pointcloud and processes it into a voxel map then adds the map to the buffer""" + #pc_mem_time = time.time() + # Import pointcloud + # print("import") + self.ego_semaphore.acquire() + self.ego_position = ego_position + self.ego_semaphore.release() + + point_count = pointcloud.shape[0] + pointcloud = cuda.to_device(pointcloud) + + # Initilize arrays on GPU + # print("init") + cell_count = cuda.to_device(np.zeros([1], dtype=np.int32)) + # -1 = unknown, -1 < free space, >= 0 point index in shorter arrays + + + + index_map = cuda.device_array([self.xy_size*self.xy_size*self.z_size], dtype=np.int32) + self.__init_1D_array[self.blocks,self.threads_per_block](index_map,-1,self.xy_size*self.xy_size*self.z_size) + + tmp_hit_count = cuda.device_array([self.xy_size*self.xy_size*self.z_size], dtype=np.int32) + self.__init_1D_array[self.blocks,self.threads_per_block](tmp_hit_count,0,self.xy_size*self.xy_size*self.z_size) + + tmp_total_count = cuda.device_array([self.xy_size*self.xy_size*self.z_size], dtype=np.int32) + self.__init_1D_array[self.blocks,self.threads_per_block](tmp_total_count,0,self.xy_size*self.xy_size*self.z_size) + + #print(" mem setup rate = " + str(1.0 / (time.time() - pc_mem_time))) + + #mem_2_time = time.time() + + #ego_position = np.zeros([3]) + origin = np.zeros([3]) + origin[0] = math.floor((ego_position[0]/self.xy_resolution) - self.xy_size/2) + origin[1] = math.floor((ego_position[1]/self.xy_resolution) - self.xy_size/2) + origin[2] = math.floor((ego_position[2]/self.z_resolution) - self.z_size/2) + + # print(origin) + ego_position = cuda.to_device(ego_position) + origin = cuda.to_device(origin) + + blocks_pointcloud = int(np.ceil(point_count/self.threads_per_block)) + blocks_map = int(np.ceil(self.xy_size*self.xy_size * self.z_size/self.threads_per_block)) + + #print(" mem setup 2 rate = " + str(1.0 / (time.time() - mem_2_time))) + + # Transform pointcloud + #tf_time = time.time() + + if not transform is None: + self.__transform_pointcloud[blocks_pointcloud, self.threads_per_block]( + pointcloud, transform, point_count) + #print(" tf rate = " + str(1.0 / (time.time() - tf_time))) + + # Count points in each voxel and number of rays through each voxel + #count_time = time.time() + + self.__point_2_map[blocks_pointcloud, self.threads_per_block]( + self.xy_resolution, self.z_resolution, self.xy_size, self.z_size, self.min_distance, pointcloud, tmp_hit_count, tmp_total_count, point_count, ego_position, origin) + #print(" count rate = " + str(1.0 / (time.time() - count_time))) + + # Make index map so we only need to store data on non-empty voxels + #small_time = time.time() + + self.__assign_indices[blocks_map, self.threads_per_block](tmp_hit_count,tmp_total_count, index_map, cell_count, self.voxel_count) + + cell_count_cpu = cell_count.copy_to_host()[0] + hit_count = cuda.device_array([cell_count_cpu], dtype=np.int32) + total_count = cuda.device_array([cell_count_cpu], dtype=np.int32) + + # Move count to smaller array + self.__move_data[blocks_map, self.threads_per_block]( + tmp_hit_count, hit_count, index_map, self.voxel_count) + + self.__move_data[blocks_map, self.threads_per_block]( + tmp_total_count, total_count, index_map, self.voxel_count) + + #print(" move to small time = " + str(1.0 / (time.time() - small_time))) + + + # Calculate metrics + #met_time = time.time() + + metrics, min_height = self.__calculate_metrics_master( + pointcloud, point_count, hit_count, index_map, cell_count_cpu, origin) + #print(" metrics rate = " + str(1.0 / (time.time() - met_time))) + + + + # Assign data to buffer + #buf_time = time.time() + + # Block the main thread from accessing this buffer index wile we write to it + self.semaphores[self.buffer_index].acquire() + + self.index_buffer[self.buffer_index] = index_map + self.hit_count_buffer[self.buffer_index] = hit_count + self.total_count_buffer[self.buffer_index] = total_count + self.metrics_buffer[self.buffer_index] = metrics + self.min_height_buffer[self.buffer_index] = min_height + self.origin_buffer[self.buffer_index] = origin + + #release this buffer index + self.semaphores[self.buffer_index].release() + #print("wrote to buffer index: " + str(self.buffer_index)) + + self.last_buffer_index = self.buffer_index + + self.buffer_index += 1 + + if(self.buffer_index >= self.buffer_size): + self.buffer_index = 0 + #print(" buf rate = " + str(1.0 / (time.time() - buf_time))) + + # return pointcloud.copy_to_host() + # return index_map.copy_to_host() + + def combine_maps(self): + """ Combines all maps in the buffer and processes into 2D maps """ + #voxel_start_time = time.time() + if(self.origin_buffer[self.last_buffer_index] is None): + print("ERROR: No data in buffer") + return + + self.combined_origin = cuda.to_device(self.origin_buffer[self.last_buffer_index].copy_to_host()) + + combined_cell_count = np.zeros([1], dtype=np.int64) + self.combined_index_map = cuda.device_array([self.xy_size*self.xy_size*self.z_size], dtype=np.int32) + self.__init_1D_array[self.blocks,self.threads_per_block](self.combined_index_map,-1,self.xy_size*self.xy_size*self.z_size) + + + blockspergrid_xy = math.ceil(self.xy_size / self.threads_per_block_3D[0]) + blockspergrid_z = math.ceil(self.z_size / self.threads_per_block_3D[2]) + blockspergrid = (blockspergrid_xy, blockspergrid_xy, blockspergrid_z) + + # Combines the index maps and calculates the nessisary size for the combined map + + for i in range(0, self.buffer_size): + self.semaphores[i].acquire() + if(self.origin_buffer[i] is None): + self.semaphores[i].release() + continue + + + self.__combine_indices[blockspergrid, self.threads_per_block_3D]( + combined_cell_count, self.combined_index_map, self.combined_origin, self.index_buffer[i], self.voxel_count, self.origin_buffer[i], self.xy_size, self.z_size) + self.semaphores[i].release() + + if not (self.last_combined_origin is None): + #print("combine_old_indices") + #__combine_old_indices + self.__combine_old_indices[blockspergrid, self.threads_per_block_3D]( + combined_cell_count, self.combined_index_map, self.combined_origin, self.last_combined_index_map, self.voxel_count, self.last_combined_origin, self.xy_size, self.z_size) + + self.combined_cell_count_cpu = combined_cell_count[0] + # print(self.combined_cell_count_cpu) + + self.combined_hit_count = cuda.device_array([self.combined_cell_count_cpu], dtype=np.int32) + self.__init_1D_array[self.blocks,self.threads_per_block](self.combined_hit_count,0,self.combined_cell_count_cpu) + + self.combined_total_count = cuda.device_array([self.combined_cell_count_cpu], dtype=np.int32) + self.__init_1D_array[self.blocks,self.threads_per_block](self.combined_total_count,0,self.combined_cell_count_cpu) + + self.combined_metrics = cuda.device_array([self.combined_cell_count_cpu], dtype=np.float32) + self.__init_1D_array[self.blocks,self.threads_per_block](self.combined_metrics,0,self.combined_cell_count_cpu*len(self.metrics)) + + self.combined_min_height = cuda.device_array([self.combined_cell_count_cpu], dtype=np.float32) + self.__init_1D_array[self.blocks,self.threads_per_block](self.combined_min_height,1,self.combined_cell_count_cpu) + + + # Combines the data in the buffer + for i in range(0, self.buffer_size): + + self.semaphores[i].acquire() + + if(self.origin_buffer[i] is None): + self.semaphores[i].release() + continue + + + self.__combine_metrics[blockspergrid, self.threads_per_block_3D](self.combined_metrics, self.combined_hit_count,self.combined_total_count,self.combined_min_height, self.combined_index_map, self.combined_origin, self.metrics_buffer[ + i], self.hit_count_buffer[i],self.total_count_buffer[i], self.min_height_buffer[i],self.index_buffer[i], self.origin_buffer[i], self.voxel_count, self.metrics, self.xy_size, self.z_size, len(self.metrics)) + + self.semaphores[i].release() + + # fill unknown cells with data from the last combined map + if not (self.last_combined_origin is None): + #__combine_old_metrics + self.__combine_metrics[blockspergrid, self.threads_per_block_3D](self.combined_metrics, self.combined_hit_count,self.combined_total_count,self.combined_min_height, self.combined_index_map, self.combined_origin, self.last_combined_metrics, + self.last_combined_hit_count,self.last_combined_total_count,self.last_combined_min_height, self.last_combined_index_map, self.last_combined_origin, self.voxel_count, self.metrics, self.xy_size, self.z_size, len(self.metrics)) + + # set the last combined map + + self.last_combined_cell_count_cpu = self.combined_cell_count_cpu + self.last_combined_hit_count = self.combined_hit_count + self.last_combined_total_count = self.combined_total_count + self.last_combined_index_map = self.combined_index_map + self.last_combined_metrics = self.combined_metrics + self.last_combined_min_height = self.combined_min_height + self.last_combined_origin = self.combined_origin + + #print("voxel map rate = " + str(1.0 / (time.time() - voxel_start_time))) + #map_start_time = time.time() + + # Make 2d maps from combined map + + # Make height map from minimum height in lowest cell + self.height_map = cuda.device_array([self.xy_size,self.xy_size]) + self.__init_2D_array[blockspergrid, self.threads_per_block_2D](self.height_map,-1000.0,self.xy_size,self.xy_size) + + self.inferred_height_map = cuda.device_array([self.xy_size,self.xy_size]) + self.__init_2D_array[blockspergrid, self.threads_per_block_2D](self.inferred_height_map,-1000.0,self.xy_size,self.xy_size) + + self.ego_semaphore.acquire() + self.__make_height_map[blockspergrid, self.threads_per_block_2D]( + self.combined_origin, self.combined_index_map, self.combined_min_height, self.xy_size, self.z_size, self.xy_resolution, self.z_resolution,self.ego_position,self.robot_radius,self.ground_to_lidar_height, self.height_map) + self.ego_semaphore.release() + + self.__make_inferred_height_map[blockspergrid, self.threads_per_block_2D]( + self.combined_origin, self.combined_index_map, self.xy_size, self.z_size, self.z_resolution, self.inferred_height_map) + # Estimate ground slope + + self.roughness_map = cuda.device_array([self.xy_size,self.xy_size]) + self.__init_2D_array[blockspergrid, self.threads_per_block_2D](self.roughness_map,-1.0,self.xy_size,self.xy_size) + + self.x_slope_map = cuda.device_array([self.xy_size,self.xy_size]) + self.__init_2D_array[blockspergrid, self.threads_per_block_2D](self.x_slope_map,0.0,self.xy_size,self.xy_size) + + self.y_slope_map = cuda.device_array([self.xy_size,self.xy_size]) + self.__init_2D_array[blockspergrid, self.threads_per_block_2D](self.y_slope_map,0.0,self.xy_size,self.xy_size) + + self.__calculate_slope[blockspergrid, self.threads_per_block_2D]( + self.height_map, self.xy_size, self.xy_resolution, self.x_slope_map, self.y_slope_map, self.roughness_map) + + # Guess what the height is in unobserved cells + self.guessed_height_delta = cuda.device_array([self.xy_size,self.xy_size]) + self.__init_2D_array[blockspergrid, self.threads_per_block_2D](self.guessed_height_delta,0.0,self.xy_size,self.xy_size) + + self.__guess_height[blockspergrid, self.threads_per_block_2D]( + self.height_map,self.inferred_height_map,self.xy_size,self.xy_resolution,self.x_slope_map,self.y_slope_map,self.guessed_height_delta) + + + # Check for positive obstacles. Any cell where the max height is more than "threshold" above the height map and less than "threshold + robot height" is marked as an obstacle + # Obstacle type can be determined from cell metrics + positive_obstacle_map = cuda.device_array([self.xy_size,self.xy_size],dtype=np.int32) + self.__init_2D_array[blockspergrid, self.threads_per_block_2D](positive_obstacle_map,0,self.xy_size,self.xy_size) + + self.__make_positive_obstacle_map[blockspergrid, self.threads_per_block_2D]( + self.combined_index_map, self.height_map, self.xy_size, self.z_size, self.z_resolution, self.positive_obstacle_threshold,self.combined_hit_count,self.combined_total_count, self.robot_height, self.combined_origin,self.x_slope_map,self.y_slope_map,self.slope_obsacle_threshold, positive_obstacle_map) + + + # Check for negative obstacles. + negative_obstacle_map = cuda.device_array([self.xy_size,self.xy_size],dtype=np.int32) + self.__init_2D_array[blockspergrid, self.threads_per_block_2D](negative_obstacle_map,0,self.xy_size,self.xy_size) + + self.__make_negative_obstacle_map[blockspergrid, self.threads_per_block_2D](self.guessed_height_delta,negative_obstacle_map,self.negative_obstacle_threshold,self.xy_size) + + # make ground visability map + visability_map = cuda.device_array([self.xy_size,self.xy_size],dtype=np.int32) + + self.__make_visability_map[blockspergrid, self.threads_per_block_2D](visability_map,self.height_map,self.xy_size) + + + # format output data + + combined_origin_world = self.combined_origin.copy_to_host() + combined_origin_world[0] = combined_origin_world[0] * self.xy_resolution + combined_origin_world[1] = combined_origin_world[1] * self.xy_resolution + combined_origin_world[2] = combined_origin_world[2] * self.z_resolution + + #print("2d map rate = " + str(1.0 / (time.time() - map_start_time))) + + # return all maps as cpu arrays + return (combined_origin_world, positive_obstacle_map.copy_to_host(),negative_obstacle_map.copy_to_host(),self.roughness_map.copy_to_host(),visability_map.copy_to_host() ) + + def make_debug_voxel_map(self): + if(self.combined_cell_count_cpu is None): + print("No data") + return None + blockspergrid_xy = math.ceil( + self.xy_size / self.threads_per_block_3D[0]) + blockspergrid_z = math.ceil(self.z_size / self.threads_per_block_3D[2]) + blockspergrid = (blockspergrid_xy, blockspergrid_xy, blockspergrid_z) + + output_voxel_map = np.zeros( + [self.combined_cell_count_cpu, 5], np.float32) + + self.__make_voxel_pointcloud[blockspergrid, self.threads_per_block_3D]( + self.combined_index_map,self.combined_hit_count,self.combined_total_count, self.combined_origin, output_voxel_map, self.xy_size, self.z_size, self.xy_resolution, self.z_resolution) + + return output_voxel_map + + def make_debug_height_map(self): + if(self.height_map is None): + print("No data") + return None + + output_height_map_voxel = np.zeros( + [self.xy_size*self.xy_size, 7], np.float32) + + blockspergrid_xy = math.ceil( + self.xy_size / self.threads_per_block_2D[0]) + blockspergrid = (blockspergrid_xy, blockspergrid_xy) + self.__make_height_map_pointcloud[blockspergrid, self.threads_per_block_2D]( + self.height_map,self.roughness_map,self.x_slope_map,self.y_slope_map, self.combined_origin, output_height_map_voxel, self.xy_size, self.xy_resolution,self.z_resolution) + + return output_height_map_voxel + + def make_debug_inferred_height_map(self): + if(self.height_map is None): + print("No data") + return None + + output_height_map_voxel = np.zeros( + [self.xy_size*self.xy_size, 3], np.float32) + + blockspergrid_xy = math.ceil( + self.xy_size / self.threads_per_block_2D[0]) + blockspergrid = (blockspergrid_xy, blockspergrid_xy) + self.__make_infered_height_map_pointcloud[blockspergrid, self.threads_per_block_2D]( + self.guessed_height_delta, self.combined_origin, output_height_map_voxel, self.xy_size, self.xy_resolution,self.z_resolution) + + return output_height_map_voxel + + @cuda.jit + def __make_visability_map(visability,height_map,xy_size): + x, y = cuda.grid(2) + if(x >= xy_size or y >= xy_size): + return + if(height_map[x,y] > - 1000): + visability[x,y] = 1.0 + else: + visability[x,y] = 0.0 + + + @cuda.jit + def __make_height_map_pointcloud(height_map,roughness,x_slope,y_slope, origin, output_voxel_map, xy_size, xy_resolution,z_resolution): + x, y = cuda.grid(2) + if(x >= xy_size or y >= xy_size): + return + index = x + y*xy_size + if(index >= 0): + output_voxel_map[index, 0] = (x + origin[0]) * xy_resolution + output_voxel_map[index, 1] = (y + origin[1]) * xy_resolution + output_voxel_map[index, 2] = height_map[x, y] - z_resolution + output_voxel_map[index, 3] = roughness[x,y] + output_voxel_map[index, 4] = x_slope[x,y] + output_voxel_map[index, 5] = y_slope[x,y] + output_voxel_map[index, 6] = math.sqrt(x_slope[x,y] * x_slope[x,y] + y_slope[x,y]*y_slope[x,y]) + + @cuda.jit + def __make_infered_height_map_pointcloud(height_map, origin, output_voxel_map, xy_size, xy_resolution,z_resolution): + x, y = cuda.grid(2) + if(x >= xy_size or y >= xy_size): + return + index = x + y*xy_size + if(index >= 0): + output_voxel_map[index, 0] = (x + origin[0]) * xy_resolution + output_voxel_map[index, 1] = (y + origin[1]) * xy_resolution + output_voxel_map[index, 2] = height_map[x, y] - z_resolution + + @cuda.jit + def __make_voxel_pointcloud(combined_index_map, combined_hit_count,combined_total_count, origin, output_voxel_map, xy_size, z_size, xy_resolution, z_resolution): + x, y, z = cuda.grid(3) + if(x >= xy_size or y >= xy_size or z > z_size): + return + + index = int(combined_index_map[int( + x + y * xy_size + z * xy_size * xy_size)]) + if(index >= 0): + output_voxel_map[index, 0] = (x + origin[0]) * xy_resolution + output_voxel_map[index, 1] = (y + origin[1]) * xy_resolution + output_voxel_map[index, 2] = (z + origin[2]) * z_resolution + output_voxel_map[index, 3] = float(combined_hit_count[index]) / float(combined_total_count[index]) + output_voxel_map[index, 4] = combined_hit_count[index] + + @cuda.jit + def __make_negative_obstacle_map(guessed_height_delta,negative_obstacle_map,negative_obstacle_threshold,xy_size): + x, y = cuda.grid(2) + if(x >= xy_size or y >= xy_size): + return + + if(guessed_height_delta[x,y] > negative_obstacle_threshold): + negative_obstacle_map[x,y] = 100 + + + @cuda.jit + def __make_positive_obstacle_map(combined_index_map, height_map, xy_size, z_size, z_resolution, positive_obstacle_threshold,hit_count,total_count, robot_height, origin,x_slope,y_slope,slope_threshold, obstacle_map): + """ + Obstacle map reports the average density of occpied voxels within the obstacle range + """ + x, y = cuda.grid(2) + if(x >= xy_size or y >= xy_size): + return + + if(math.sqrt(x_slope[x,y] * x_slope[x,y] + y_slope[x,y] * y_slope[x,y]) >= slope_threshold): + obstacle_map[x,y] = 100 + return + + + min_obs_height = height_map[x,y] + positive_obstacle_threshold + max_obs_height = height_map[x,y] + robot_height + + min_height_index = int(math.floor((min_obs_height/z_resolution) - origin[2])) + 1 + max_height_index = int(math.floor((max_obs_height/z_resolution) - origin[2])) + + if not (min_height_index >= 0 and min_height_index < z_size): + return + + if not (max_height_index >= 0 and max_height_index < z_size): + return + + density = 0.0 + n = 0.0 + for z in range(min_height_index,max_height_index+1): + index = int(combined_index_map[int(x + y * xy_size + z * xy_size * xy_size)]) + + if(index >= 0): + if(hit_count[index] > 10): + n += float(total_count[index]) + density += float(hit_count[index]) + + + if(n>0.0): + density /= n + + obstacle_map[x, y] = int(density * 100) + + + + + @cuda.jit + def __make_height_map(combined_origin, combined_index_map, min_height, xy_size, z_size,xy_resolution, z_resolution,ego_position,radius,ground_to_lidar_height, output_height_map): + x, y = cuda.grid(2) + if(x >= xy_size or y >= xy_size): + return + + xp = (((combined_origin[0] + x) * xy_resolution) - ego_position[0]) + yp = (((combined_origin[1] + y) * xy_resolution) - ego_position[1]) + + if(xp*xp + yp*yp <= radius*radius): + output_height_map[x, y] = ego_position[2] - ground_to_lidar_height + + for z in range(z_size): + index = combined_index_map[int(x + y * xy_size + z * xy_size * xy_size)] + if(index >= 0): + output_height_map[x, y] = ( min_height[index] + z + combined_origin[2]) * z_resolution + return + + + @cuda.jit + def __make_inferred_height_map(combined_origin, combined_index_map, xy_size, z_size, z_resolution, output_inferred_height_map): + x, y = cuda.grid(2) + if(x >= xy_size or y >= xy_size): + return + + for z in range(z_size): + index = combined_index_map[int(x + y * xy_size + z * xy_size * xy_size)] + if(index < -1): + inferred_height = (z + combined_origin[2]) * z_resolution + output_inferred_height_map[x, y] = inferred_height + return + + @cuda.jit + def __guess_height(height_map,inferred_height_map,xy_size,xy_resolution,slope_map_x,slope_map_y,output_guessed_height_delta): + x0, y0 = cuda.grid(2) + if(x0 >= xy_size or y0 >= xy_size): + return + if( height_map[x0,y0] > -1000 ): + return + if(inferred_height_map[x0,y0] == -1000.0): + return + + x_p_done = False + x_n_done = False + y_p_done = False + y_n_done = False + + + x_p = x0 + x_ph = -1000 + x_n = x0 + x_nh = -1000 + y_p = y0 + y_ph = -1000 + y_n = y0 + y_nh = -1000 + + + i = 0 + while (i < 15 ) and (not (x_n_done and x_n_done and y_p_done and y_n_done)): + + x_p += 1 + x_n -= 1 + y_p += 1 + y_n -= 1 + + i += 1 + + if not x_p_done: + if(x_p < xy_size): + + for dy in range(-i,i): + if(y0 + dy >= xy_size or y0+dy <0): + continue + + if( height_map[x_p,y0 + dy] > -1000 ): + + x_ph = height_map[x_p,y0 + dy] + x_p_done = True + break + else: + x_p_done = True + + if not x_n_done: + if(x_n >= 0): + + for dy in range(-i + 1 ,i + 1): + if(y0 + dy >= xy_size or y0+dy <0): + continue + + if( height_map[x_n,y0 + dy] > -1000 ): + + x_nh = height_map[x_n,y0 + dy] + x_n_done = True + break + else: + x_n_done = True + + if not y_p_done: + if(y_p < xy_size): + + for dx in range(-i+1,i+1): + if(x0 + dx >= xy_size or x0+dx <0): + continue + + if( height_map[x0+dx,y_p] > -1000 ): + + y_ph = height_map[x0+dx,y_p] + y_p_done = True + break + else: + y_p_done = True + + if not y_n_done: + if(y_n >= 0): + + for dx in range(-i ,i ): + if(x0 + dx >= xy_size or x0+dx <0): + continue + + if( height_map[x0 + dx,y_n] > -1000 ): + + y_nh = height_map[x0 + dx,y_n] + y_n_done = True + break + else: + y_n_done = True + + + min_h = 1000.0 + max_h = inferred_height_map[x0,y0] + + if(x_ph > -1000): + min_h = min(x_ph,min_h) + max_h = max(x_ph,max_h) + + if(x_nh > -1000): + min_h = min(x_nh,min_h) + max_h = max(x_nh,max_h) + + if(y_ph > -1000): + min_h = min(y_ph,min_h) + max_h = max(y_ph,max_h) + + if(x_nh > -1000): + min_h = min(y_nh,min_h) + max_h = max(y_nh,max_h) + + + + dh = max_h - min_h + + if(dh > 0): + output_guessed_height_delta[x0,y0] = dh + + + + @cuda.jit + def __calculate_slope(height_map,xy_size,xy_resolution,output_slope_map_x,output_slope_map_y, output_roughness_map): + x0, y0 = cuda.grid(2) + if(x0 >= xy_size or y0 >= xy_size): + return + + n_good_pts = 0 + + radius = 1 + for x in range(max(0,x0 - radius), min(xy_size, x0 + radius + 1)): + for y in range(max(0,y0 - radius), min(xy_size, y0 + radius + 1)): + if( height_map[x,y] > -1000 ): + n_good_pts += 1 + + if(n_good_pts <3): + return + + pts = numba.cuda.local.array((3,9),np.float64) + + i=0 + mean_x = 0.0 + mean_y = 0.0 + mean_z = 0.0 + for x in range(max(0,x0 - radius), min(xy_size, x0 + radius + 1)): + for y in range(max(0,y0 - radius), min(xy_size, y0 + radius + 1)): + if( height_map[x,y] > -1000 ): + pts[0,i] = x * xy_resolution + pts[1,i] = y * xy_resolution + pts[2,i] = height_map[x,y] + + mean_x += pts[0,i] + mean_y += pts[1,i] + mean_z += pts[2,i] + + i+=1 + + mean_x /= float(i) + mean_y /= float(i) + mean_z /= float(i) + + xx=0.0 + xy=0.0 + xz=0.0 + yy=0.0 + yz=0.0 + for i in range(0,n_good_pts): + xx += (pts[0,i] - mean_x)*(pts[0,i] - mean_x) + xy += (pts[0,i] - mean_x)*(pts[1,i] - mean_y) + xz += (pts[0,i] - mean_x)*(pts[2,i] - mean_z) + yy += (pts[1,i] - mean_y)*(pts[1,i] - mean_y) + yz += (pts[1,i] - mean_y)*(pts[2,i] - mean_z) + + det = xx*yy - xy*xy + if(det == 0.0): + return + + a0 = (yy*xz - xy*yz) / det + a1 = (xx*yz - xy*xz) / det + + error = 0.0 + + # A*x + B*y + C*z = D + # n = [A,B,C] + # z = a0 * x + a1 * y + # 0 = a0 * x + a1 * y - z + # D = 0, A = a0, B = a1, C = -1 + # n = [-a0,-a1,1] + # theta_ = atan2(1,-a0) + + m = math.sqrt(a0*a0 + a1*a1 + 1) + a0/=m + a1/=m + + for i in range(0,n_good_pts): + e = (pts[2,i] - mean_z) - (a0 * (pts[0,i] - mean_x) + a1 * (pts[1,i] - mean_y)) + error += e*e + + error /= float(n_good_pts) + if(error >0): + error = math.log(error) + output_roughness_map[x0,y0] = error + + x_angle = math.atan2(a0,1.0/m) + y_angle = math.atan2(a1,1.0/m) + + output_slope_map_x[x0,y0] = x_angle + output_slope_map_y[x0,y0] = y_angle + + pass + + @cuda.jit + def __expand_binary(input_img, output_img, xy_size, r): + """ + Assumes that both the input and output images contain only 0s and 1s + """ + x, y = cuda.grid(2) + if(x >= xy_size or y >= xy_size): + return + + tmp_val = 0.0 + tmp_count = 0.0 + + r_int = int(math.floor(r)) + + for i in range(-r_int, r_int + 1): + dy = int(math.floor(math.sqrt(r*r - i*i))) + if(x + i < 0 or x+i >= xy_size): + continue + for j in range(-dy, dy + 1): + if(y + j < 0 or y+j >= xy_size): + continue + + if(input_img[x+i, y+j] == 1): + output_img[x, y] = 1 + return + + output_img[x, y] = 0 + + @cuda.jit + def __lowpass_binary(input_img, output_img, xy_size, filter_size, filter_fraction): + """ + Assumes that both the input and output images contain only 0s and 1s + """ + x, y = cuda.grid(2) + if(x >= xy_size or y >= xy_size): + return + + tmp_val = 0.0 + tmp_count = 0.0 + + if(input_img[x, y] == 0): + output_img[x, y] = 0 + return + + for i in range(-filter_size, filter_size + 1): + if(x + i < 0 or x+i >= xy_size): + continue + for j in range(-filter_size, filter_size + 1): + if(y + j < 0 or y+j >= xy_size): + continue + tmp_val += input_img[x+i, y+j]*1.0 + tmp_count += 1.0 + + if((tmp_val/tmp_count) >= filter_fraction): + output_img[x, y] = 1 + else: + output_img[x, y] = 0 + + @cuda.jit + def __convolve(input_img, output_img, kernel, xy_size, kernel_size): + x, y = cuda.grid(2) + if(x >= xy_size or y >= xy_size): + return + + r = (kernel_size-1)/2 + + tmp_val = 0.0 + + for i in range(-r, r+1): + x2 = i + r + if(x + r < 0 or x+r >= xy_size): + continue + for j in range(-r, r+1): + y2 = j + r + if(y + r < 0 or y+r >= xy_size): + continue + tmp_val += input_img[x+r, y+r] * kernel[x2, y2] + + output_img[x, y] = tmp_val + + + @cuda.jit + def __combine_metrics(combined_metrics, combined_hit_count,combined_total_count,combined_min_height, combined_index_map, combined_origin, old_metrics, old_hit_count,old_total_count,old_min_height, old_index_map, old_origin, voxel_count, metrics_list, xy_size, z_size, num_metrics): + x, y, z = cuda.grid(3) + + if(x >= xy_size or y >= xy_size or z >= z_size): + return + + dx = combined_origin[0] - old_origin[0] + dy = combined_origin[1] - old_origin[1] + dz = combined_origin[2] - old_origin[2] + + if((x + dx) >= xy_size or (y + dy) >= xy_size or (z + dz) >= z_size or (x+dx) < 0 or (y+dy) < 0 or (z+dz) < 0): + return + + index = combined_index_map[int( + x + y * xy_size + z * xy_size * xy_size)] + index_old = old_index_map[int( + (x + dx) + (y + dy) * xy_size + (z + dz) * xy_size * xy_size)] + + if(index < 0 or index_old < 0): + return + + combined_hit_count[index] = combined_hit_count[index] + old_hit_count[index_old] + combined_total_count[index] = combined_total_count[index] + old_total_count[index_old] + combined_min_height[index] = min(combined_min_height[index],old_min_height[index_old]) + + @cuda.jit + def __combine_old_metrics(combined_metrics, combined_hit_count,combined_total_count,combined_min_height, combined_index_map, combined_origin, old_metrics, old_hit_count,old_total_count,old_min_height, old_index_map, old_origin, voxel_count, metrics_list, xy_size, z_size, num_metrics): + x, y, z = cuda.grid(3) + + if(x >= xy_size or y >= xy_size or z >= z_size): + return + + dx = combined_origin[0] - old_origin[0] + dy = combined_origin[1] - old_origin[1] + dz = combined_origin[2] - old_origin[2] + + if((x + dx) >= xy_size or (y + dy) >= xy_size or (z + dz) >= z_size or (x+dx) < 0 or (y+dy) < 0 or (z+dz) < 0): + return + + index = combined_index_map[int( + x + y * xy_size + z * xy_size * xy_size)] + index_old = old_index_map[int( + (x + dx) + (y + dy) * xy_size + (z + dz) * xy_size * xy_size)] + + if(index < 0 or index_old < 0): + return + + combined_hit_count[index] = combined_hit_count[index] + old_hit_count[index_old] + combined_total_count[index] = combined_total_count[index] + old_total_count[index_old] + combined_min_height[index] = min(combined_min_height[index],old_min_height[index_old]) + + + @cuda.jit + def __combine_indices(combined_cell_count, combined_index_map, combined_origin, old_index_map, voxel_count, old_origin, xy_size, z_size): + x, y, z = cuda.grid(3) + + if(x >= xy_size or y >= xy_size or z >= z_size): + #print("bad index") + return + + dx = combined_origin[0] - old_origin[0] + dy = combined_origin[1] - old_origin[1] + dz = combined_origin[2] - old_origin[2] + + if((x + dx) >= xy_size or (y + dy) >= xy_size or (z + dz) >= z_size or (x+dx) < 0 or (y+dy) < 0 or (z+dz) < 0): + # print("oob") + return + + index = int(x + y * xy_size + z * xy_size * xy_size) + index_old = int((x + dx) + (y + dy) * xy_size + + (z + dz) * xy_size * xy_size) + + # If there is no data or empty data in the combined map and an occpuied voexl in the new map + if(old_index_map[index_old] >= 0 and combined_index_map[index] <= -1): + combined_index_map[index] = cuda.atomic.add(combined_cell_count, 0, 1) + + # if there is an empty cell in the old map and no data or empty data in the new map + elif(old_index_map[index_old] < -1 and combined_index_map[index] <= -1): + combined_index_map[index] += old_index_map[index_old] + 1 + + @cuda.jit + def __combine_old_indices(combined_cell_count, combined_index_map, combined_origin, old_index_map, voxel_count, old_origin, xy_size, z_size): + x, y, z = cuda.grid(3) + + if(x >= xy_size or y >= xy_size or z >= z_size): + #print("bad index") + return + + dx = combined_origin[0] - old_origin[0] + dy = combined_origin[1] - old_origin[1] + dz = combined_origin[2] - old_origin[2] + + if((x + dx) >= xy_size or (y + dy) >= xy_size or (z + dz) >= z_size or (x+dx) < 0 or (y+dy) < 0 or (z+dz) < 0): + # print("oob") + return + + index = int(x + y * xy_size + z * xy_size * xy_size) + index_old = int((x + dx) + (y + dy) * xy_size + + (z + dz) * xy_size * xy_size) + + # If there is no data in the combined map and an occpuied voexl in the new map + if((old_index_map[index_old]) >= 0 and (combined_index_map[index] <= -1) and (combined_index_map[index] >= -11)): + combined_index_map[index] = cuda.atomic.add(combined_cell_count, 0, 1) + + # if there is an empty cell in the old map and no data or empty data in the new map + elif(old_index_map[index_old] < -1 and combined_index_map[index] <= -1): + combined_index_map[index] += old_index_map[index_old] + 1 + + @cuda.jit + def __combine_2_maps(map1, map2): + pass + + def __calculate_metrics_master(self, pointcloud, point_count, count, index_map, cell_count_cpu, origin): + # print("mean") + + mean = cuda.device_array([cell_count_cpu*3], dtype=np.float32) + self.__init_1D_array[self.blocks,self.threads_per_block](mean,0,cell_count_cpu*3) + + #print("min height") + min_height = cuda.device_array([cell_count_cpu*3], dtype=np.float32) + self.__init_1D_array[self.blocks,self.threads_per_block](min_height,1,cell_count_cpu*3) + + + #print("other metrics") + metrics = cuda.device_array([cell_count_cpu*len(self.metrics)], dtype=np.float32) + self.__init_1D_array[self.blocks,self.threads_per_block](metrics,0,cell_count_cpu*len(self.metrics)) + + #print("calc blocks") + calculate_blocks = ( + int(np.ceil(point_count/self.threads_per_block)), 3) + #print("calc mean") + + #self.__calculate_mean[calculate_blocks, self.threads_per_block]( + # self.xy_resolution, self.z_resolution, self.xy_size, self.z_size, self.min_distance, index_map, pointcloud, mean, point_count, origin) + + # print("norm") + normalize_blocks = ( + int(np.ceil(cell_count_cpu/self.threads_per_block)), 3) + + # self.__normalize_mean[normalize_blocks,self.threads_per_block](mean,count) + # print("other") + + self.__calculate_metrics[calculate_blocks, self.threads_per_block]( + self.xy_resolution, self.z_resolution, self.xy_size, self.z_size, self.min_distance, index_map, pointcloud, mean, metrics, min_height, self.metrics, len(self.metrics), count, point_count, origin) + + # print("return") + + return metrics, min_height + + @cuda.jit + def __transform_pointcloud(points, transform, point_count): + i = cuda.grid(1) + if(i < point_count): + pt = numba.cuda.local.array(3, "f8") + pt[0] = points[i, 0] * transform[0, 0] + points[i, 1] * \ + transform[0, 1] + points[i, 2] * \ + transform[0, 2] + transform[0, 3] + pt[1] = points[i, 0] * transform[1, 0] + points[i, 1] * \ + transform[1, 1] + points[i, 2] * \ + transform[1, 2] + transform[1, 3] + pt[2] = points[i, 0] * transform[2, 0] + points[i, 1] * \ + transform[2, 1] + points[i, 2] * \ + transform[2, 2] + transform[2, 3] + + points[i, 0] = pt[0] + points[i, 1] = pt[1] + points[i, 2] = pt[2] + + @cuda.jit + def __point_2_map(xy_resolution, z_resolution, xy_size, z_size, min_distance, points, hit_count, total_count, point_count, ego_position, origin): + i = cuda.grid(1) + if(i < point_count): + + d2 = points[i, 0]*points[i, 0] + points[i, 1] * \ + points[i, 1] + points[i, 2]*points[i, 2] + + if(d2 < min_distance*min_distance): + return + + oob = False + + x_index = math.floor((points[i, 0] / xy_resolution) - origin[0]) + if(x_index < 0 or x_index >= xy_size): + oob = True + + y_index = math.floor((points[i, 1] / xy_resolution) - origin[1]) + if(y_index < 0 or y_index >= xy_size): + oob = True + + z_index = math.floor((points[i, 2] / z_resolution) - origin[2]) + if(z_index < 0 or z_index >= z_size): + oob = True + + if not oob: + # get the index of the hit + index = x_index + y_index*xy_size + z_index*xy_size*xy_size + + # update the hit count for the index + cuda.atomic.add(hit_count, index, 1) + cuda.atomic.add(total_count, index, 1) + # Trace the ray + + pt = numba.cuda.local.array(3, numba.float32) + end = numba.cuda.local.array(3, numba.float32) + slope = numba.cuda.local.array(3, numba.float32) + + pt[0] = ego_position[0] / xy_resolution + pt[1] = ego_position[1] / xy_resolution + pt[2] = ego_position[2] / z_resolution + + end[0] = points[i, 0] / xy_resolution + end[1] = points[i, 1] / xy_resolution + end[2] = points[i, 2] / z_resolution + + slope[0] = end[0] - pt[0] + slope[1] = end[1] - pt[1] + slope[2] = end[2] - pt[2] + + ray_length = math.sqrt( + slope[0]*slope[0] + slope[1]*slope[1] + slope[2]*slope[2]) + + slope[0] = slope[0] / ray_length + slope[1] = slope[1] / ray_length + slope[2] = slope[2] / ray_length + + slope_max = max(abs(slope[0]), max(abs(slope[1]), abs(slope[2]))) + + slope_index = 0 + + if(slope_max == abs(slope[1])): + slope_index = 1 + if(slope_max == abs(slope[2])): + slope_index = 2 + + length = 0 + direction = slope[slope_index]/abs(slope[slope_index]) + while (length < ray_length - 1): + pt[slope_index] += direction + pt[(slope_index + 1) % 3] += slope[(slope_index + 1) % + 3] / abs(slope[slope_index]) + pt[(slope_index + 2) % 3] += slope[(slope_index + 2) % + 3] / abs(slope[slope_index]) + + x_index = math.floor(pt[0] - origin[0]) + if(x_index < 0 or x_index >= xy_size): + return + + y_index = math.floor(pt[1] - origin[1]) + if(y_index < 0 or y_index >= xy_size): + return + + z_index = math.floor(pt[2] - origin[2]) + if(z_index < 0 or z_index >= z_size): + return + + index = x_index + y_index*xy_size + z_index*xy_size*xy_size + + cuda.atomic.add(total_count, index, 1) + + length += abs(1.0/slope[slope_index]) + + @cuda.jit + def __assign_indices(hit_count, miss_count, index_map, cell_count, voxel_count): + i = cuda.grid(1) + if(i < voxel_count): + if(hit_count[i] > 0): + index_map[i] = cuda.atomic.add(cell_count, 0, 1) + else: + index_map[i] = - miss_count[i] - 1 + + @cuda.jit + def __move_data(old, new, index_map, voxel_count): + i = cuda.grid(1) + if(i < voxel_count): + if(index_map[i] >= 0): + new[index_map[i]] = old[i] + + @cuda.jit + def __calculate_mean(xy_resolution, z_resolution, xy_size, z_size, min_distance, index_map, points, mean, point_count, origin): + i = cuda.grid(1) + if(i < point_count): + + d2 = points[i, 0]*points[i, 0] + points[i, 1] * \ + points[i, 1] + points[i, 2]*points[i, 2] + + if(d2 < min_distance*min_distance): + return + + x_index = math.floor((points[i, 0]/xy_resolution) - origin[0]) + if(x_index < 0 or x_index >= xy_size): + return + + y_index = math.floor((points[i, 1]/xy_resolution) - origin[1]) + if(y_index < 0 or y_index >= xy_size): + return + + z_index = math.floor((points[i, 2]/z_resolution) - origin[2]) + if(z_index < 0 or z_index >= z_size): + return + + local_point = cuda.local.array(shape=3, dtype=numba.float64) + + local_point[0] = points[i, 0] - x_index * xy_resolution + local_point[1] = points[i, 1] - y_index * xy_resolution + local_point[2] = points[i, 2] - z_index * z_resolution + + index = index_map[int( + x_index + y_index*xy_size + z_index*xy_size*xy_size)] + + cuda.atomic.add(mean, 3*index, local_point[0]) + cuda.atomic.add(mean, 1+3*index, local_point[1]) + cuda.atomic.add(mean, 2+3*index, local_point[2]) + + @cuda.jit + def __normalize_mean(mean, count): + i, j = cuda.grid(2) + + mean[j + 3*i] = mean[j + 3*i]/count[i] + + @cuda.jit + def __calculate_metrics(xy_resolution, z_resolution, xy_size, z_size, min_distance, index_map, points, mean, output_metrics, min_height, metrics_list, num_metrics, count, point_count, origin): + i = cuda.grid(1) + if(i < point_count): + + d2 = points[i, 0]*points[i, 0] + points[i, 1] * \ + points[i, 1] + points[i, 2]*points[i, 2] + + if(d2 < min_distance*min_distance): + return + + x_index = math.floor((points[i, 0]/xy_resolution) - origin[0]) + if(x_index < 0 or x_index >= xy_size): + return + + y_index = math.floor((points[i, 1]/xy_resolution) - origin[1]) + if(y_index < 0 or y_index >= xy_size): + return + + z_index = math.floor((points[i, 2]/z_resolution) - origin[2]) + if(z_index < 0 or z_index >= z_size): + return + + local_point = cuda.local.array(shape=3, dtype=numba.float64) + + local_point[0] = (points[i, 0]/xy_resolution) - x_index - origin[0] + local_point[1] = (points[i, 1]/xy_resolution) - y_index - origin[1] + local_point[2] = (points[i, 2]/z_resolution) - z_index - origin[2] + index = index_map[int( + x_index + y_index*xy_size + z_index*xy_size*xy_size)] + + for j in range(num_metrics): + if (metrics_list[j, 1] > 0): + cuda.atomic.add(output_metrics, j + index*num_metrics, + (local_point[metrics_list[j, 0]]-mean[metrics_list[j, 0] + 3*index])**metrics_list[j, 1]) + + cuda.atomic.min(min_height, index, local_point[2]) + + @cuda.jit + def __init_1D_array(array,value,length): + i = cuda.grid(1) + if(i>=length): + return + array[i] = value + + @cuda.jit + def __init_2D_array(array,value,width,height): + x,y = cuda.grid(2) + if(x>=width or y>=height): + return + array[x,y] = value diff --git a/scripts/gvom_ros.py b/scripts/gvom_ros.py new file mode 100644 index 0000000..996919d --- /dev/null +++ b/scripts/gvom_ros.py @@ -0,0 +1,228 @@ +import rospy +import numpy as np +import gvom +import sensor_msgs.point_cloud2 as pc2 +from nav_msgs.msg import Odometry, OccupancyGrid +from sensor_msgs.msg import PointCloud2 +import tf2_ros +import tf +import ros_numpy +import time + +import matplotlib.pyplot as plt + +voxel_mapper = None +listener = None +transformer = None +odom_data = None +tfBuffer = None +lidar_debug_pub = None +odom_frame = "/camera_init" +new_data = False + +def odom_callback(data): + global odom_data + odom_data = (data.pose.pose.position.x,data.pose.pose.position.y,data.pose.pose.position.z) + #print("got odom") + +def lidar_callback(data): + global new_data + print("got scan") + + + + if odom_data == None: + print("no odom") + return + + if not voxel_mapper is None: + #try: + scan_time = time.time() + lidar_frame = data.header.frame_id + #listener.waitForTransform(odom_frame,lidar_frame, data.header.stamp,rospy.Duration(0.01)) + trans = tfBuffer.lookup_transform(odom_frame,lidar_frame, data.header.stamp,rospy.Duration(0.01)) + #print(trans) + + + translation = np.zeros([3]) + translation[0] = trans.transform.translation.x + translation[1] = trans.transform.translation.y + translation[2] = trans.transform.translation.z + + rotation = np.zeros([4]) + rotation[0] = trans.transform.rotation.x + rotation[1] = trans.transform.rotation.y + rotation[2] = trans.transform.rotation.z + rotation[3] = trans.transform.rotation.w + + tf_matrix = transformer.fromTranslationRotation(translation,rotation) + #print(tf_matrix) + #print("x offset = " + str(tf_matrix[0,3])) + + pc = ros_numpy.point_cloud2.pointcloud2_to_xyz_array(data) + #scan_center = (trans.transform.translation.x,trans.transform.translation.y,trans.transform.translation.z) + voxel_mapper.Process_pointcloud(pc,odom_data,tf_matrix) + new_data = True + + #transformed_pc = voxel_mapper.Process_pointcloud(pc,scan_center,tf_matrix).astype(np.float32) + #print(transformed_pc.dtype) + + #transformed_pc = np.core.records.fromarrays([transformed_pc[:,0],transformed_pc[:,1],transformed_pc[:,2]],names='x,y,z') + #print(transformed_pc) + + #debug_msg = ros_numpy.point_cloud2.array_to_pointcloud2(transformed_pc,data.header.stamp,odom_frame) + #lidar_debug_pub.publish(debug_msg) + print(" pointcloud rate = " + str(1.0 / (time.time() - scan_time))) + #print("processed pc") + #except: + # print("bad tf") + pass + +def main(): + global voxel_mapper, listener, transformer, tfBuffer, lidar_debug_pub, new_data + + printed = False + + rospy.init_node('voxel_mapping', anonymous=True) + print("start") + + tfBuffer = tf2_ros.Buffer() + listener = tf2_ros.TransformListener(tfBuffer) + transformer = tf.TransformerROS() + + xy_resolution = 0.40 + z_resolution = 0.2 + width = 256 + height = 64 + buffer_size = 4 + min_point_distance = 1.0 + positive_obstacle_threshold = 0.50 + negative_obstacle_threshold = 0.5 + density_threshold = 50 + slope_obsacle_threshold = 0.3 + min_roughness = -10 + max_roughness = 0 + robot_height = 2.0 + robot_radius = 4.0 + ground_to_lidar_height = 1.0 + + voxel_mapper = gvom.Gvom(xy_resolution,z_resolution,width,height,buffer_size,min_point_distance,positive_obstacle_threshold, + negative_obstacle_threshold,slope_obsacle_threshold,robot_height,robot_radius,ground_to_lidar_height) + + rospy.Subscriber("/velodyne_cloud_registered", PointCloud2, lidar_callback,queue_size=1) + rospy.Subscriber("/aft_mapped_to_init_high_frec", Odometry, odom_callback,queue_size=1) + + s_obstacle_map_pub = rospy.Publisher("/local_planning/map/soft_obstacle",OccupancyGrid,queue_size = 1) + p_obstacle_map_pub = rospy.Publisher("/local_planning/map/positive_obstacle",OccupancyGrid,queue_size = 1) + n_obstacle_map_pub = rospy.Publisher("/local_planning/map/negative_obstacle",OccupancyGrid,queue_size = 1) + h_obstacle_map_pub = rospy.Publisher("/local_planning/map/hard_obstacle",OccupancyGrid,queue_size = 1) + g_certainty_pub = rospy.Publisher("/local_planning/map/ground_certainty",OccupancyGrid,queue_size = 1) + a_certainty_pub = rospy.Publisher("/local_planning/map/all_ground_certainty",OccupancyGrid,queue_size = 1) + r_map_pub = rospy.Publisher("/local_planning/map/roughness",OccupancyGrid,queue_size = 1) + + lidar_debug_pub = rospy.Publisher('/local_planning/voxel/debug/lidar',PointCloud2,queue_size = 1) + voxel_debug_pub = rospy.Publisher('/local_planning/voxel/debug/voxel',PointCloud2,queue_size = 1) + voxel_hm_debug_pub = rospy.Publisher('/local_planning/voxel/debug/height_map',PointCloud2,queue_size = 1) + voxel_inf_hm_debug_pub = rospy.Publisher('/local_planning/voxel/debug/inferred_height_map',PointCloud2,queue_size = 1) + + + rate = rospy.Rate(10) + while not rospy.is_shutdown(): + if(not new_data): + if(not printed): + print("No new data") + printed = True + rate.sleep() + continue + new_data = False + printed = False + + print("combined maps") + map_time = time.time() + map_data = voxel_mapper.combine_maps() + + + if not map_data is None: + map_origin = map_data[0] + obs_map = map_data[1] + neg_map = map_data[2] + rough_map = map_data[3] + cert_map = map_data[4] + #print(obs_map) + #print(obs_map.shape) + #print(map_origin) + + out_map = OccupancyGrid() + out_map.header.stamp = rospy.Time.now() + out_map.header.frame_id = odom_frame + out_map.info.resolution = xy_resolution + out_map.info.width = width + out_map.info.height = width + out_map.info.origin.orientation.x = 0 + out_map.info.origin.orientation.y = 0 + out_map.info.origin.orientation.z = 0 + out_map.info.origin.orientation.w = 1 + out_map.info.origin.position.x = map_origin[0] + out_map.info.origin.position.y = map_origin[1] + out_map.info.origin.position.z = 0 + + out_map.data = np.reshape(np.maximum(100 * (obs_map > density_threshold),neg_map),-1,order='F').astype(np.int8) + + h_obstacle_map_pub.publish(out_map) + + out_map.data = np.reshape(100 * (obs_map <= density_threshold) * (obs_map > 0),-1,order='F').astype(np.int8) + + s_obstacle_map_pub.publish(out_map) + + out_map.data = np.reshape(cert_map*100,-1,order='F').astype(np.int8) + + g_certainty_pub.publish(out_map) + a_certainty_pub.publish(out_map) + + out_map.data = np.reshape(neg_map,-1,order='F').astype(np.int8) + + n_obstacle_map_pub.publish(out_map) + + rough_map = ((np.maximum(np.minimum(rough_map,max_roughness),min_roughness) + min_roughness) / (max_roughness - min_roughness)) * 100 + out_map.data = np.reshape(rough_map,-1,order='F').astype(np.int8) + + r_map_pub.publish(out_map) + + voxel_pc = voxel_mapper.make_debug_voxel_map() + if not (voxel_pc is None): + voxel_pc = np.core.records.fromarrays([voxel_pc[:,0],voxel_pc[:,1],voxel_pc[:,2],voxel_pc[:,3],voxel_pc[:,4]],names='x,y,z,solid factor,count') + + voxel_debug_pub.publish(ros_numpy.point_cloud2.array_to_pointcloud2(voxel_pc,rospy.Time.now(),odom_frame)) + + voxel_hm = voxel_mapper.make_debug_height_map() + if not (voxel_hm is None): + #plt.figure() + #plt.scatter(obs_map.flatten('F'),voxel_hm[:,6]) + #plt.xlabel("obstacle density") + #plt.ylabel("slope") + #plt.show() + + voxel_hm = np.core.records.fromarrays([voxel_hm[:,0],voxel_hm[:,1],voxel_hm[:,2],voxel_hm[:,3],voxel_hm[:,4],voxel_hm[:,5],voxel_hm[:,6],obs_map.flatten('F')],names='x,y,z,roughness,slope_x,slope_y,slope,obstacles') + + voxel_hm_debug_pub.publish(ros_numpy.point_cloud2.array_to_pointcloud2(voxel_hm,rospy.Time.now(),odom_frame)) + + voxel_inf_hm = voxel_mapper.make_debug_inferred_height_map() + if not (voxel_inf_hm is None): + voxel_inf_hm = np.core.records.fromarrays([voxel_inf_hm[:,0],voxel_inf_hm[:,1],voxel_inf_hm[:,2]],names='x,y,z') + + voxel_inf_hm_debug_pub.publish(ros_numpy.point_cloud2.array_to_pointcloud2(voxel_inf_hm,rospy.Time.now(),odom_frame)) + + print("map rate = " + str(1.0 / (time.time() - map_time))) + + else: + print("no map data") + + + + + rate.sleep() + print("end") + + +if __name__ == '__main__': + main()