diff --git a/.gitignore b/.gitignore
index 6926523a6..43bfd39e2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,6 @@
 build
+buildcuda
+compile
 
 # Dependency folders
 depends/*/
@@ -123,6 +125,11 @@ ClientBin/
 *.pfx
 *.publishsettings
 
+# Nsight Nvidia Eclipse
+.cproject
+.project
+.settings/
+
 # RIA/Silverlight projects
 Generated_Code/
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d8ef04736..39bba8717 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,3 +1,4 @@
+#CMAKE_MINIMUM_REQUIRED(VERSION 3.28.3)
 CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12.1)
 
 SET(PROJECT_VER_MAJOR 0)
@@ -45,7 +46,6 @@ OPTION(ENABLE_PROFILING "Collect profiling stats (memory consuming)" OFF)
 IF(ENABLE_PROFILING)
   SET(LIBFREENECT2_WITH_PROFILING 1)
 ENDIF()
-
 IF(MSVC)
   # suppress several "possible loss of data" warnings, and
   # "zero-length array in struct" from libusb.h
@@ -266,6 +266,7 @@ ENDIF()
 SET(HAVE_OpenGL disabled)
 IF(ENABLE_OPENGL)
   FIND_PACKAGE(GLFW3)
+  set(OpenGL_GL_PREFERENCE "GLVND")
   FIND_PACKAGE(OpenGL)
   SET(HAVE_OpenGL no)
   IF(GLFW3_FOUND AND OPENGL_FOUND)
@@ -277,7 +278,8 @@ IF(ENABLE_OPENGL)
     LIST(APPEND LIBFREENECT2_DLLS ${GLFW3_DLL})
     LIST(APPEND LIBRARIES
       ${GLFW3_LIBRARIES}
-      ${OPENGL_gl_LIBRARY}
+      ${OPENGL_GL_LIBRARY}
+      ${OPENGL_LIBRARIES}
     )
     LIST(APPEND SOURCES
       src/flextGL.cpp
@@ -337,19 +339,27 @@ IF(ENABLE_OPENCL)
   ENDIF(OpenCL_FOUND)
 ENDIF(ENABLE_OPENCL)
 
+
 SET(HAVE_CUDA disabled)
 IF(ENABLE_CUDA)
-  FIND_PACKAGE(CUDA)
+  INCLUDE(CheckLanguage)
+  CHECK_LANGUAGE(CUDA)
+#  FIND_PACKAGE(CUDA)
   SET(HAVE_CUDA no)
-  IF(CUDA_FOUND AND MSVC14 AND CUDA_VERSION VERSION_LESS 8.0)
+  #IF(CUDA_FOUND AND MSVC14 AND CUDA_VERSION VERSION_LESS 8.0)
+  IF(CMAKE_CUDA_COMPILER AND MSVC14 AND CUDA_VERSION VERSION_LESS 8.0)
     SET(HAVE_CUDA "no (VS2015 not supported)")
-  ELSEIF(CUDA_FOUND)
+  #ELSEIF(CUDA_FOUND)
+  ELSEIF(CMAKE_CUDA_COMPILER)
     SET(LIBFREENECT2_WITH_CUDA_SUPPORT 1)
     SET(HAVE_CUDA yes)
+    ENABLE_LANGUAGE(CUDA)
 
+    #FIND_PACKAGE(CUDAToolkit)
     STRING(REPLACE "\\" "/" NVCUDASAMPLES_ROOT "$ENV{NVCUDASAMPLES_ROOT}")
     STRING(REPLACE "\\" "/" NVCUDASAMPLES8_0_ROOT "$ENV{NVCUDASAMPLES8_0_ROOT}")
-    CUDA_INCLUDE_DIRECTORIES(
+    #CUDA_INCLUDE_DIRECTORIES(
+    INCLUDE_DIRECTORIES(
       "${MY_DIR}/include/"
       "${CUDA_TOOLKIT_ROOT_DIR}/samples/common/inc"
       "${NVCUDASAMPLES_ROOT}/common/inc"
@@ -357,7 +367,7 @@ IF(ENABLE_CUDA)
     )
     SET(CUDA_FLAGS -use_fast_math)
     IF(NOT MSVC)
-      SET(CUDA_FLAGS "${CUDA_FLAGS} -Xcompiler -fPIC")
+      SET(CUDA_FLAGS "${CUDA_FLAGS} -Xcompiler -fPIC -D_FORCE_INLINES")
     ENDIF()
     IF(HAVE_CXX11 STREQUAL yes AND CUDA_VERSION VERSION_GREATER 7.0)
       SET(CUDA_FLAGS "${CUDA_FLAGS} -std=c++11")
@@ -365,25 +375,42 @@ IF(ENABLE_CUDA)
 
     SET(OLD_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
     STRING(REGEX REPLACE "-std=c\\+\\+.." "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
-    CUDA_COMPILE(CUDA_OBJECTS
+    # Thrust requires exceptions.  If OpenCL from NVidia is used we don't need this flag.
+    STRING(REGEX REPLACE "-fno-exceptions" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+    MESSAGE(STATUS "*************")
+    MESSAGE(STATUS ${CMAKE_CXX_FLAGS})
+    MESSAGE(STATUS ${CUDA_FLAGS})
+    MESSAGE(STATUS ${CUDA_INCLUDE_DIRS})
+    MESSAGE(STATUS ${CUDA_LIBRARIES})
+    MESSAGE(STATUS ${CUDA_OBJECTS})
+    MESSAGE(STATUS "*************")
+    #CUDA_COMPILE(CUDA_OBJECTS
+    LIST(APPEND SOURCES
+      #include/libfreenect2/cuda_registration.h
       src/cuda_depth_packet_processor.cu
       src/cuda_kde_depth_packet_processor.cu
-      OPTIONS ${CUDA_FLAGS}
+      src/cuda_registration.cu
+      #OPTIONS ${CUDA_FLAGS}
     )
     SET(CMAKE_CXX_FLAGS "${OLD_CMAKE_CXX_FLAGS}")
 
     INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS})
 
-    LIST(APPEND SOURCES
-      ${CUDA_OBJECTS}
-    )
+    #LIST(APPEND SOURCES
+    #  ${CUDA_OBJECTS}
+    #)
 
-    LIST(APPEND LIBRARIES
-      ${CUDA_LIBRARIES}
-    )
+    #LIST(APPEND LIBRARIES
+    #  ${CUDA_LIBRARIES}
+    #)
+    #set_target_properties(freenect2 PROPERTIES CUDA_ARCHITECTURES "35;50;75")
+    #SET_PROPERTY(TARGET freenect2 PROPERTY CUDA_ARCHITECTURES OFF)
   ENDIF()
+  #ENDIF(CMAKE_CUDA_COMPILER)
 ENDIF(ENABLE_CUDA)
 
+
+
 # RPATH handling for CUDA 8.0 libOpenCL.so conflict. See #804.
 IF(HAVE_OpenCL STREQUAL yes AND UNIX AND NOT APPLE)
   FILE(GLOB CUDA_ld_so_conf /etc/ld.so.conf.d/cuda*.conf)
diff --git a/doc/sequence.svg b/doc/sequence.svg
new file mode 100644
index 000000000..351c09b09
--- /dev/null
+++ b/doc/sequence.svg
@@ -0,0 +1,433 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   width="210mm"
+   height="297mm"
+   viewBox="0 0 210 297"
+   version="1.1"
+   id="svg5"
+   inkscape:version="1.2.2 (b0a8486541, 2022-12-01)"
+   sodipodi:docname="sequence.svg"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:svg="http://www.w3.org/2000/svg">
+  <sodipodi:namedview
+     id="namedview7"
+     pagecolor="#ffffff"
+     bordercolor="#000000"
+     borderopacity="0.25"
+     inkscape:showpageshadow="2"
+     inkscape:pageopacity="0.0"
+     inkscape:pagecheckerboard="0"
+     inkscape:deskcolor="#d1d1d1"
+     inkscape:document-units="mm"
+     showgrid="true"
+     inkscape:zoom="1.4734708"
+     inkscape:cx="375.98302"
+     inkscape:cy="586.03129"
+     inkscape:window-width="1920"
+     inkscape:window-height="1008"
+     inkscape:window-x="0"
+     inkscape:window-y="0"
+     inkscape:window-maximized="1"
+     inkscape:current-layer="layer1">
+    <inkscape:grid
+       type="xygrid"
+       id="grid119" />
+  </sodipodi:namedview>
+  <defs
+     id="defs2">
+    <marker
+       style="overflow:visible"
+       id="marker22339"
+       refX="0"
+       refY="0"
+       orient="auto-start-reverse"
+       inkscape:stockid="Arrow1L"
+       markerWidth="8.75"
+       markerHeight="5"
+       viewBox="0 0 8.75 5"
+       inkscape:isstock="true"
+       inkscape:collect="always"
+       preserveAspectRatio="xMidYMid">
+      <path
+         style="fill:context-stroke;fill-rule:evenodd;stroke:none"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="path22337"
+         transform="scale(-0.5)" />
+    </marker>
+    <marker
+       style="overflow:visible"
+       id="Arrow1L"
+       refX="0"
+       refY="0"
+       orient="auto-start-reverse"
+       inkscape:stockid="Arrow1L"
+       markerWidth="7"
+       markerHeight="4"
+       viewBox="0 0 8.75 5"
+       inkscape:isstock="true"
+       inkscape:collect="always"
+       preserveAspectRatio="xMidYMid">
+      <path
+         style="fill:context-stroke;fill-rule:evenodd;stroke:none"
+         d="M 0,0 5,-5 -12.5,0 5,5 Z"
+         id="arrow1L"
+         transform="scale(-0.5)" />
+    </marker>
+    <rect
+       x="25"
+       y="105"
+       width="190"
+       height="120"
+       id="rect6525" />
+  </defs>
+  <g
+     inkscape:label="Capa 1"
+     inkscape:groupmode="layer"
+     id="layer1">
+    <text
+       xml:space="preserve"
+       style="font-size:3.175px;fill:#000000;stroke-width:0.365001;stroke-opacity:0.929412"
+       x="8.4666662"
+       y="82.020836"
+       id="text4956"><tspan
+         sodipodi:role="line"
+         id="tspan4954"
+         style="stroke-width:0.365"
+         x="8.4666662"
+         y="82.020836">int types = 0;</tspan><tspan
+         sodipodi:role="line"
+         style="stroke-width:0.365"
+         x="8.4666662"
+         y="85.989586"
+         id="tspan8818">libfreenect2::SyncMultiFrameListener listener</tspan><tspan
+         sodipodi:role="line"
+         style="stroke-width:0.365"
+         x="8.4666662"
+         y="89.958336"
+         id="tspan8820">libfreenect2::FrameMap frames;</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-size:3.175px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.665001;stroke-opacity:0.929412"
+       x="6.6145835"
+       y="38.364582"
+       id="text9694"><tspan
+         sodipodi:role="line"
+         id="tspan9692"
+         style="fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.665;stroke-opacity:0.929412"
+         x="6.6145835"
+         y="38.364582">  libfreenect2::Freenect2 freenect2;</tspan><tspan
+         sodipodi:role="line"
+         style="fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.665;stroke-opacity:0.929412"
+         x="6.6145835"
+         y="42.333332"
+         id="tspan9696">  libfreenect2::Freenect2Device *dev = 0;</tspan><tspan
+         sodipodi:role="line"
+         style="fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.665;stroke-opacity:0.929412"
+         x="6.6145835"
+         y="46.302082"
+         id="tspan9698">  libfreenect2::PacketPipeline *pipeline = 0;</tspan></text>
+    <rect
+       style="fill:none;stroke:#ffcc00;stroke-width:0.665001"
+       id="rect865"
+       width="76.729164"
+       height="112.44791"
+       x="2.6458333"
+       y="3.96875"
+       ry="1.3229179" />
+    <rect
+       style="fill:none;stroke:#2ca02c;stroke-width:0.665001"
+       id="rect6587"
+       width="70.114586"
+       height="85.989586"
+       x="6.614583"
+       y="27.78125"
+       ry="1.3229179" />
+    <text
+       xml:space="preserve"
+       style="font-size:3.175px;fill:#000000;stroke-width:0.264583"
+       x="3.6607749"
+       y="8.4984169"
+       id="text113"><tspan
+         sodipodi:role="line"
+         id="tspan111"
+         style="stroke-width:0.264583"
+         x="3.6607749"
+         y="8.4984169">Protonect.cpp</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-size:3.175px;fill:#000000;stroke-width:0.264583"
+       x="8.9905415"
+       y="31.71825"
+       id="text117"><tspan
+         sodipodi:role="line"
+         id="tspan115"
+         style="stroke-width:0.264583"
+         x="8.9905415"
+         y="31.71825">main</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-size:3.175px;fill:#000000;stroke-width:0.264583"
+       x="83.073875"
+       y="45.540085"
+       id="text264"><tspan
+         sodipodi:role="line"
+         id="tspan262"
+         style="stroke-width:0.264583"
+         x="83.073875"
+         y="45.540085">libfreenect2::CudaAccessPacketPipeline(deviceId)</tspan></text>
+    <path
+       style="fill:#000000;stroke:#000000;stroke-width:0.365;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#Arrow1L)"
+       d="m 71.4375,44.979167 6.614583,0"
+       id="path320"
+       sodipodi:nodetypes="cc" />
+    <text
+       xml:space="preserve"
+       style="font-size:3.175px;fill:#000000;stroke:none;stroke-width:0.665001"
+       x="6.3447084"
+       y="16.980959"
+       id="text4045"><tspan
+         sodipodi:role="line"
+         id="tspan4043"
+         style="fill:#000000;stroke:none;stroke-width:0.665"
+         x="6.3447084"
+         y="16.980959">bool protonect_shutdown = false;</tspan><tspan
+         sodipodi:role="line"
+         style="fill:#000000;stroke:none;stroke-width:0.665"
+         x="6.3447084"
+         y="20.949709"
+         id="tspan4887">bool protonect_paused = false;</tspan><tspan
+         sodipodi:role="line"
+         style="fill:#000000;stroke:none;stroke-width:0.665"
+         x="6.3447084"
+         y="24.918459"
+         id="tspan5629">libfreenect2::Freenect2Device *devtopause;</tspan><tspan
+         sodipodi:role="line"
+         style="fill:#000000;stroke:none;stroke-width:0.665"
+         x="6.3447084"
+         y="28.887209"
+         id="tspan4889" /></text>
+    <text
+       xml:space="preserve"
+       transform="scale(0.26458333)"
+       id="text6523"
+       style="white-space:pre;shape-inside:url(#rect6525);display:inline;fill:#2ca02c;stroke:#2ca02c;stroke-width:2.51339" />
+    <path
+       style="fill:none;stroke:#ffcc00;stroke-width:0.665001;stroke-opacity:0.752941"
+       d="m 2.6458333,10.583333 72.7604167,0"
+       id="path6786"
+       sodipodi:nodetypes="cc" />
+    <path
+       style="fill:none;stroke:#2ca02c;stroke-width:0.665001;stroke-opacity:1"
+       d="m 6.6145832,33.072916 63.4999998,10e-7"
+       id="path7256"
+       sodipodi:nodetypes="cc" />
+    <text
+       xml:space="preserve"
+       style="font-size:3.175px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.365001;stroke-dasharray:none;stroke-opacity:0.929412"
+       x="6.6145835"
+       y="51.59375"
+       id="text13099"><tspan
+         sodipodi:role="line"
+         id="tspan13097"
+         style="stroke-width:0.365"
+         x="6.6145835"
+         y="51.59375">  std::string serial = &quot;&quot;;</tspan><tspan
+         sodipodi:role="line"
+         style="stroke-width:0.365"
+         x="6.6145835"
+         y="55.5625"
+         id="tspan13101" /><tspan
+         sodipodi:role="line"
+         style="stroke-width:0.365"
+         x="6.6145835"
+         y="59.53125"
+         id="tspan13103">  bool viewer_enabled = true;</tspan><tspan
+         sodipodi:role="line"
+         style="stroke-width:0.365"
+         x="6.6145835"
+         y="63.5"
+         id="tspan13105">  bool enable_rgb = true;</tspan><tspan
+         sodipodi:role="line"
+         style="stroke-width:0.365"
+         x="6.6145835"
+         y="67.46875"
+         id="tspan13107">  bool enable_depth = true;</tspan><tspan
+         sodipodi:role="line"
+         style="stroke-width:0.365"
+         x="6.6145835"
+         y="71.4375"
+         id="tspan13109">  int deviceId = -1;</tspan><tspan
+         sodipodi:role="line"
+         style="stroke-width:0.365"
+         x="6.6145835"
+         y="75.40625"
+         id="tspan13111">  size_t framemax = -1;</tspan></text>
+    <path
+       style="fill:#000000;stroke:#000000;stroke-width:0.365;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#Arrow1L)"
+       d="M 41.010417,50.270833 H 78.052083"
+       id="path16211"
+       sodipodi:nodetypes="cc" />
+    <text
+       xml:space="preserve"
+       style="font-size:3.175px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.365001;stroke-dasharray:none;stroke-opacity:0.929412"
+       x="83.34375"
+       y="51.59375"
+       id="text16959"><tspan
+         sodipodi:role="line"
+         id="tspan16957"
+         style="stroke-width:0.365"
+         x="83.34375"
+         y="51.59375">freenect2.getDefaultDeviceSerialNumber();</tspan></text>
+    <path
+       style="fill:#000000;stroke:#000000;stroke-width:0.365;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#Arrow1L)"
+       d="m 67.46875,41.010417 h 59.00208"
+       id="path21434"
+       sodipodi:nodetypes="cc" />
+    <text
+       xml:space="preserve"
+       style="font-size:3.175px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.365001;stroke-dasharray:none;stroke-opacity:0.929412"
+       x="128.32288"
+       y="42.333332"
+       id="text22182"><tspan
+         sodipodi:role="line"
+         id="tspan22180"
+         style="stroke-width:0.365"
+         x="128.32288"
+         y="42.333332">freenect2.openDevice(serial, pipeline);</tspan></text>
+    <path
+       style="fill:#000000;stroke:#000000;stroke-width:0.365;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#Arrow1L)"
+       d="m 78.316669,84.666671 h 3.439583"
+       id="path22230"
+       sodipodi:nodetypes="cc" />
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker22339)"
+       d="m 157.42708,44.979166 19.84375,10e-7 V 43.65625"
+       id="path22335"
+       inkscape:connector-type="polyline"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="ccc" />
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker22339)"
+       d="m 148.16974,50.138578 17.45943,0.132255 V 43.65625"
+       id="path22419"
+       inkscape:connector-type="polyline"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="ccc" />
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker22339)"
+       d="m 186.53125,41.010417 6.61458,0 V 23.8125 H 72.760417"
+       id="path3351"
+       inkscape:connector-type="polyline"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="cccc" />
+    <text
+       xml:space="preserve"
+       style="font-size:3.175px;fill:#000000;stroke-width:0.365001;stroke-opacity:0.929412"
+       x="83.07917"
+       y="82.020836"
+       id="text6510"><tspan
+         sodipodi:role="line"
+         id="tspan6508"
+         style="stroke-width:0.365"
+         x="83.07917"
+         y="82.020836">libfreenect2::Frame::Color | libfreenect2::Frame::Ir | libfreenect2::Frame::Depth </tspan></text>
+    <path
+       style="fill:#000000;stroke:#000000;stroke-width:0.365;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#Arrow1L)"
+       d="M 27.78125,80.962501 H 78.052083"
+       id="path8012"
+       sodipodi:nodetypes="cc" />
+    <text
+       xml:space="preserve"
+       style="font-size:3.175px;fill:#000000;stroke-width:0.365001;stroke-opacity:0.929412"
+       x="84.666664"
+       y="85.724998"
+       id="text9630"><tspan
+         sodipodi:role="line"
+         id="tspan9628"
+         style="stroke-width:0.365"
+         x="84.666664"
+         y="85.724998">(   )</tspan></text>
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker22339)"
+       d="M 141.55208,82.020833 V 83.34375 H 87.3125 v 1.322917"
+       id="path9632"
+       inkscape:connector-type="polyline"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="cccc" />
+    <text
+       xml:space="preserve"
+       style="font-size:3.175px;fill:#000000;stroke-width:0.365001;stroke-opacity:0.929412"
+       x="10.583333"
+       y="129.64583"
+       id="text10445"><tspan
+         sodipodi:role="line"
+         id="tspan10443"
+         style="stroke-width:0.365"
+         x="10.583333"
+         y="129.64583">dev</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-size:3.175px;fill:#000000;stroke-width:0.365001;stroke-opacity:0.929412"
+       x="21.166666"
+       y="140.22917"
+       id="text11199"><tspan
+         sodipodi:role="line"
+         id="tspan11197"
+         style="stroke-width:0.365"
+         x="21.166666"
+         y="140.22917">setColorFrameListener(&amp;listener)</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-size:3.175px;fill:#000000;stroke-width:0.365001;stroke-opacity:0.929412"
+       x="21.166666"
+       y="148.16667"
+       id="text11953"><tspan
+         sodipodi:role="line"
+         id="tspan11951"
+         style="stroke-width:0.365"
+         x="21.166666"
+         y="148.16667">setIrAndDepthFrameListener(&amp;listener);</tspan></text>
+    <path
+       style="fill:#000000;stroke-width:0.365001;stroke-opacity:1;stroke:#000000;stroke-dasharray:1.46000397,0.73000198;stroke-dashoffset:0"
+       d="m 13.229167,132.29166 v 76.72917"
+       id="path13574" />
+    <rect
+       style="fill:none;stroke:#000000;stroke-width:0.365001;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       id="rect13748"
+       width="10.583333"
+       height="7.9375"
+       x="7.9375"
+       y="124.35416"
+       ry="1.3229179" />
+    <path
+       style="fill:#000000;stroke:#000000;stroke-width:0.365;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#Arrow1L)"
+       d="m 15.08125,139.7 h 3.439583"
+       id="path13817"
+       sodipodi:nodetypes="cc" />
+    <path
+       style="fill:#000000;stroke:#000000;stroke-width:0.365;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#Arrow1L)"
+       d="m 15.08125,147.10832 h 3.439583"
+       id="path13819"
+       sodipodi:nodetypes="cc" />
+    <text
+       xml:space="preserve"
+       style="font-size:3.175px;fill:#000000;stroke:none;stroke-width:0.365001;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;fill-opacity:1"
+       x="21.166666"
+       y="156.10417"
+       id="text14694"><tspan
+         sodipodi:role="line"
+         id="tspan14692"
+         style="stroke-width:0.365;fill:#000000;fill-opacity:1;stroke:none"
+         x="21.166666"
+         y="156.10417">start()</tspan></text>
+    <path
+       style="fill:#000000;stroke:#000000;stroke-width:0.365;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#Arrow1L)"
+       d="m 15.08125,154.51667 h 3.439583"
+       id="path14746"
+       sodipodi:nodetypes="cc" />
+  </g>
+</svg>
diff --git a/examples/Protonect.cpp b/examples/Protonect.cpp
index 2fbf4a67b..c34ec8c3a 100644
--- a/examples/Protonect.cpp
+++ b/examples/Protonect.cpp
@@ -101,9 +101,13 @@ class MyFileLogger: public libfreenect2::Logger
  * Main application entry point.
  *
  * Accepted argumemnts:
- * - cpu Perform depth processing with the CPU.
- * - gl  Perform depth processing with OpenGL.
- * - cl  Perform depth processing with OpenCL.
+ * - cpu        Perform depth processing with the CPU.
+ * - gl         Perform depth processing with OpenGL.
+ * - cl         Perform depth processing with OpenCL.
+ * - clkde
+ * - cuda
+ * - cudakde
+ * - cudaccess  Does not send data to CPU.
  * - <number> Serial number of the device to open.
  * - -noviewer Disable viewer window.
  */
@@ -113,7 +117,7 @@ int main(int argc, char *argv[])
   std::string program_path(argv[0]);
   std::cerr << "Version: " << LIBFREENECT2_VERSION << std::endl;
   std::cerr << "Environment variables: LOGFILE=<protonect.log>" << std::endl;
-  std::cerr << "Usage: " << program_path << " [-gpu=<id>] [gl | cl | clkde | cuda | cudakde | cpu] [<device serial>]" << std::endl;
+  std::cerr << "Usage: " << program_path << " [-gpu=<id>] [gl | cl | clkde | cuda | cudakde | cudaccess | cpu] [<device serial>]" << std::endl;
   std::cerr << "        [-noviewer] [-norgb | -nodepth] [-help] [-version]" << std::endl;
   std::cerr << "        [-frames <number of frames to process>]" << std::endl;
   std::cerr << "To pause and unpause: pkill -USR1 Protonect" << std::endl;
@@ -225,6 +229,15 @@ int main(int argc, char *argv[])
         pipeline = new libfreenect2::CudaKdePacketPipeline(deviceId);
 #else
       std::cout << "CUDA pipeline is not supported!" << std::endl;
+#endif
+    }
+    else if(arg == "cudaccess")
+    {
+#ifdef LIBFREENECT2_WITH_CUDA_SUPPORT
+      if(!pipeline)
+        pipeline = new libfreenect2::CudaAccessPacketPipeline(deviceId);
+#else
+      std::cout << "CUDA pipeline is not supported!" << std::endl;
 #endif
     }
     else if(arg.find_first_not_of("0123456789") == std::string::npos) //check if parameter could be a serial number
diff --git a/include/libfreenect2/cuda_registration.h b/include/libfreenect2/cuda_registration.h
new file mode 100644
index 000000000..76c995e0d
--- /dev/null
+++ b/include/libfreenect2/cuda_registration.h
@@ -0,0 +1,162 @@
+/*
+ * This file is part of the OpenKinect Project. http://www.openkinect.org
+ *
+ * Copyright (c) 2014 individual OpenKinect contributors. See the CONTRIB file
+ * for details.
+ *
+ * This code is licensed to you under the terms of the Apache License, version
+ * 2.0, or, at your option, the terms of the GNU General Public License,
+ * version 2.0. See the APACHE20 and GPL2 files for the text of the licenses,
+ * or the following URLs:
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.gnu.org/licenses/gpl-2.0.txt
+ *
+ * If you redistribute this file in source form, modified or unmodified, you
+ * may:
+ *   1) Leave this header intact and distribute it under the same terms,
+ *      accompanying it with the APACHE20 and GPL20 files, or
+ *   2) Delete the Apache 2.0 clause and accompany it with the GPL2 file, or
+ *   3) Delete the GPL v2 clause and accompany it with the APACHE20 file
+ * In all cases you must keep the copyright notice intact and include a copy
+ * of the CONTRIB file.
+ *
+ * Binary distributions must follow the binary distribution requirements of
+ * either License.
+ */
+
+/** @file cuda_registration.h Class for merging depth and color frames using cuda. */
+
+#ifndef CUDA_REGISTRATION_H_
+#define CUDA_REGISTRATION_H_
+
+#include <string>
+#include <libfreenect2/config.h>
+#include <libfreenect2/libfreenect2.hpp>
+#include <libfreenect2/frame_listener.hpp>
+
+#ifdef LIBFREENECT2_WITH_CUDA_SUPPORT
+#include <thrust/host_vector.h>
+#include <thrust/device_vector.h>
+#include <thrust/copy.h>
+#include <thrust/device_ptr.h>
+#include <thrust/sequence.h>
+
+namespace libfreenect2
+{
+
+typedef thrust::tuple<float, float, float, float> TupleXYZRGB;
+
+/**
+ * Frame whose data is allocated on device.
+ */
+class LIBFREENECT2_API CudaDeviceFrame: public Frame
+{
+public:
+	/** Construct a new frame.
+	 * @param width Width in pixel
+	 * @param height Height in pixel
+	 * @param bytes_per_pixel Bytes per pixel
+	 */
+	CudaDeviceFrame(size_t width, size_t height, size_t bytes_per_pixel);
+	virtual ~CudaDeviceFrame();
+private:
+	bool allocateMemory();
+};
+
+class CudaRegistrationImpl;
+
+/** @defgroup registration Registration and Geometry
+ * Register depth to color, create point clouds. */
+
+/** Combine frames of depth and color camera using gpus. @ingroup registration
+ * Right now this class uses a reverse engineered formula that uses factory
+ * preset extrinsic parameters the same way the Registration class does.
+ */
+class LIBFREENECT2_API CudaRegistration
+{
+public:
+  /**
+   * @param depth_p Depth camera parameters. You can use the factory values, or use your own.
+   * @param rgb_p Color camera parameters. Probably use the factory values for now.
+   */
+	CudaRegistration(Freenect2Device::IrCameraParams depth_p, Freenect2Device::ColorCameraParams rgb_p);
+  ~CudaRegistration();
+
+  /** Undistort and register a single depth point to color camera.
+   * @param dx Distorted depth coordinate x (pixel)
+   * @param dy Distorted depth coordinate y (pixel)
+   * @param dz Depth value (millimeter)
+   * @param[out] cx Undistorted color coordinate x (normalized)
+   * @param[out] cy Undistorted color coordinate y (normalized)
+   */
+  void apply(int dx, int dy, float dz, float& cx, float &cy) const;
+
+  /** Map color images onto depth images
+   * @param rgb Color image (1920x1080 BGRX)
+   * @param depth Depth image (512x424 float)
+   * @param[out] undistorted Undistorted depth image
+   * @param[out] registered Color image for the depth image (512x424)
+   * @param enable_filter Filter out pixels not visible to both cameras.
+   * @param[out] bigdepth If not `NULL`, return mapping of depth onto colors (1920x1082 float). **1082** not 1080, with a blank top and bottom row.
+   * @param[out] color_depth_map Index of mapped color pixel for each depth pixel (512x424).
+   */
+  bool apply(const Frame* rgb, const Frame* depth, CudaDeviceFrame* undistorted, CudaDeviceFrame* registered, const bool enable_filter = true, CudaDeviceFrame* bigdepth = 0, int* color_depth_map = 0) const;
+
+  /** Undistort depth
+   * @param depth Depth image (512x424 float)
+   * @param[out] undistorted Undistorted depth image
+   */
+  void undistortDepth(const Frame* depth, Frame* undistorted) const;
+
+  /** Construct a 3-D point with color in a point cloud.
+   * @param undistorted Undistorted depth frame from apply().
+   * @param registered Registered color frame from apply().
+   * @param r Row (y) index in depth image.
+   * @param c Column (x) index in depth image.
+   * @param[out] x X coordinate of the 3-D point (meter).
+   * @param[out] y Y coordinate of the 3-D point (meter).
+   * @param[out] z Z coordinate of the 3-D point (meter).
+   * @param[out] rgb Color of the 3-D point (BGRX). To unpack the data, use
+   *
+   *     const uint8_t *p = reinterpret_cast<uint8_t*>(&rgb);
+   *     uint8_t b = p[0];
+   *     uint8_t g = p[1];
+   *     uint8_t r = p[2];
+   */
+  void getPointXYZRGB (const Frame* undistorted, const Frame* registered, int r, int c, float& x, float& y, float& z, float& rgb) const;
+
+  /** Construct a 3-D point in a point cloud.
+   * @param undistorted Undistorted depth frame from apply().
+   * @param r Row (y) index in depth image.
+   * @param c Column (x) index in depth image.
+   * @param[out] x X coordinate of the 3-D point (meter).
+   * @param[out] y Y coordinate of the 3-D point (meter).
+   * @param[out] z Z coordinate of the 3-D point (meter).
+   */
+  void getPointXYZ (const Frame* undistorted, int r, int c, float& x, float& y, float& z) const;
+
+  /**
+   * Construct a point cloud as thrust vector of XYZRGB data as tuples of <float, float, float, float> in device memory, which can be used
+   * for further processing with CUDA.
+   * @param undistorted Undistorted depth frame from apply().
+   * @param registered Registered color frame from apply().
+   * @param[out] cloud_data <X, Y, Z, RGB> coordinates of the 3-D point (meter) and color (BGRX).
+   *     To unpack the color data, use
+   *     const uint8_t *p = reinterpret_cast<uint8_t*>(&rgb);
+   *     uint8_t b = p[0];
+   *     uint8_t g = p[1];
+   *     uint8_t r = p[2];
+   */
+  void getPointXYZRGB(const Frame* undistorted, const Frame* registered, thrust::device_vector<TupleXYZRGB>& cloud_data) const;
+
+private:
+  CudaRegistrationImpl *impl_;
+
+  /* Disable copy and assignment constructors */
+  CudaRegistration(const CudaRegistration&);
+  CudaRegistration& operator=(const CudaRegistration&);
+};
+#endif // LIBFREENECT2_WITH_CUDA_SUPPORT
+
+} /* namespace libfreenect2 */
+#endif /* REGISTRATION_H_ */
diff --git a/src/cuda_registration.cu b/src/cuda_registration.cu
new file mode 100644
index 000000000..e0c8872ee
--- /dev/null
+++ b/src/cuda_registration.cu
@@ -0,0 +1,558 @@
+/*
+ * This file is part of the OpenKinect Project. http://www.openkinect.org
+ *
+ * Copyright (c) 2014 individual OpenKinect contributors. See the CONTRIB file
+ * for details.
+ *
+ * This code is licensed to you under the terms of the Apache License, version
+ * 2.0, or, at your option, the terms of the GNU General Public License,
+ * version 2.0. See the APACHE20 and GPL2 files for the text of the licenses,
+ * or the following URLs:
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.gnu.org/licenses/gpl-2.0.txt
+ *
+ * If you redistribute this file in source form, modified or unmodified, you
+ * may:
+ *   1) Leave this header intact and distribute it under the same terms,
+ *      accompanying it with the APACHE20 and GPL20 files, or
+ *   2) Delete the Apache 2.0 clause and accompany it with the GPL2 file, or
+ *   3) Delete the GPL v2 clause and accompany it with the APACHE20 file
+ * In all cases you must keep the copyright notice intact and include a copy
+ * of the CONTRIB file.
+ *
+ * Binary distributions must follow the binary distribution requirements of
+ * either License.
+ */
+
+/** @file Implementation of merging depth and color images using cuda. */
+
+#include <libfreenect2/cuda_registration.h>
+#include "libfreenect2/logging.h"
+#include <limits>
+
+#define MONO_ROWS 424
+#define MONO_COLS 512
+#define COLOR_ROWS 1080
+#define COLOR_COLS 1920
+
+
+typedef unsigned char uchar;
+
+#define CHECK_CUDA(expr) do { cudaError_t err = (expr); if (err != cudaSuccess) { LOG_ERROR << #expr ": " << cudaGetErrorString(err); return false; } } while(0)
+#define CALL_CUDA(expr) do { cudaError_t err = (expr); if (err != cudaSuccess) { LOG_ERROR << #expr ": " << cudaGetErrorString(err); } } while(0)
+
+static __device__
+void distort(int mx, int my, float& d_x, float& d_y, const libfreenect2::Freenect2Device::IrCameraParams& d_depth)
+{
+  float dx = ((float)mx - d_depth.cx) / d_depth.fx;
+  float dy = ((float)my - d_depth.cy) / d_depth.fy;
+  float dx2 = dx * dx;
+  float dy2 = dy * dy;
+  float r2 = dx2 + dy2;
+  float dxdy2 = 2 * dx * dy;
+  float kr = 1 + ((d_depth.k3 * r2 + d_depth.k2) * r2 + d_depth.k1) * r2;
+  d_x = d_depth.fx * (dx * kr + d_depth.p2 * (r2 + 2 * dx2) + d_depth.p1 * dxdy2) + d_depth.cx;
+  d_y = d_depth.fy * (dy * kr + d_depth.p1 * (r2 + 2 * dy2) + d_depth.p2 * dxdy2) + d_depth.cy;
+}
+
+static __device__
+void depth_to_color(float mx, float my, float& d_rx, float& d_ry,
+		const libfreenect2::Freenect2Device::IrCameraParams& d_depth,
+		const libfreenect2::Freenect2Device::ColorCameraParams& d_color,
+		const float depth_q, const float color_q)
+{
+  mx = (mx - d_depth.cx) * depth_q;
+  my = (my - d_depth.cy) * depth_q;
+
+  float wx =
+    (mx * mx * mx * d_color.mx_x3y0) + (my * my * my * d_color.mx_x0y3) +
+    (mx * mx * my * d_color.mx_x2y1) + (my * my * mx * d_color.mx_x1y2) +
+    (mx * mx * d_color.mx_x2y0) + (my * my * d_color.mx_x0y2) + (mx * my * d_color.mx_x1y1) +
+    (mx * d_color.mx_x1y0) + (my * d_color.mx_x0y1) + (d_color.mx_x0y0);
+
+  float wy =
+    (mx * mx * mx * d_color.my_x3y0) + (my * my * my * d_color.my_x0y3) +
+    (mx * mx * my * d_color.my_x2y1) + (my * my * mx * d_color.my_x1y2) +
+    (mx * mx * d_color.my_x2y0) + (my * my * d_color.my_x0y2) + (mx * my * d_color.my_x1y1) +
+    (mx * d_color.my_x1y0) + (my * d_color.my_x0y1) + (d_color.my_x0y0);
+
+    d_rx = (wx / (d_color.fx * color_q)) - (d_color.shift_m / d_color.shift_d);
+    d_ry = (wy / color_q) + d_color.cy;
+}
+
+static __global__
+void dInitMaps(int* d_map_dist, float* d_map_x, float* d_map_y, int* d_map_yi,
+		const libfreenect2::Freenect2Device::IrCameraParams d_depth,
+		const libfreenect2::Freenect2Device::ColorCameraParams d_color,
+		const float depth_q, const float color_q)
+{
+  // Configuration copied from cuda_depth_packet_processor.cu
+  const uint i = blockIdx.x*blockDim.x + threadIdx.x;
+
+  const uint x = i % MONO_COLS;
+  const uint y = i / MONO_COLS;
+
+  float mx, my;
+  int ix, iy, index;
+  float rx, ry;
+
+  // compute the distorted coordinate for current pixel
+  distort(x, y, mx, my, d_depth);
+
+  // rounding the values and check if the pixel is inside the image
+  ix = (int)(mx + 0.5f);
+  iy = (int)(my + 0.5f);
+  if(ix < 0 || ix >= 512 || iy < 0 || iy >= 424)
+		index = -1;
+  else
+  // computing the index from the coordinates for faster access to the data
+  index = iy * 512 + ix;
+  d_map_dist[i] = index;
+
+  // compute the depth to color mapping entries for the current pixel
+  depth_to_color(x, y, rx, ry, d_depth, d_color, depth_q, color_q);
+  d_map_x[i] = rx;
+  d_map_y[i] = ry;
+  // compute the y offset to minimize later computations
+  d_map_yi[i] = (int)(ry + 0.5f);
+}
+
+static __global__
+void setFloat(float* devPtr, float value)
+{
+  // Configuration copied from cuda_depth_packet_processor.cu
+  const uint i = blockIdx.x * blockDim.x + threadIdx.x;
+
+  devPtr[i] = value;
+}
+
+/**
+ * Set all values of array of floats devPtr to value.
+ * This function does not call for synchronization.
+ * @param devPtr pointer to memory in device
+ * @param value value to set
+ * @param size number of float sized elements in array
+ */
+void cudaMemsetFloat(float* devPtr, float value, size_t size)
+{
+  size_t numThreads = 512;
+  size_t numBlocks = size / numThreads;
+  setFloat<<<numBlocks, numThreads>>>(devPtr, value);
+}
+
+/**
+ * Compares value at address with val, if val is smaller it
+ * saves it at address.
+ */
+__device__ float atomicKeepSmaller(float* address, float val)
+{
+  // Implementation addapted from http://docs.nvidia.com/cuda/cuda-c-programming-guide/#atomic-functions
+  int* address_as_ull = (int*)address;
+  int old = *address_as_ull, assumed;
+  do {
+    assumed = old;
+    old = atomicCAS(address_as_ull, assumed,
+                    __float_as_int(val < __int_as_float(assumed) ? val : __int_as_float(assumed)));
+    // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
+  } while (assumed != old);
+
+  return __int_as_float(old);
+}
+
+
+static __global__
+void undistort(int* d_depth_to_c_off,
+  float* d_undistorted_data,
+  float* d_filter_map,
+  const float* d_depth_data, const int* d_map_dist,
+  const float* d_map_x, const int* d_map_yi,
+  const libfreenect2::Freenect2Device::IrCameraParams depth,
+  const libfreenect2::Freenect2Device::ColorCameraParams color,
+  const int filter_width_half,
+  const int filter_height_half,
+  const int offset_filter_map,
+  const bool enable_filter)
+{
+  // getting index of distorted depth pixel
+  const int i = blockIdx.x * blockDim.x + threadIdx.x;
+  const int index = d_map_dist[i];
+
+  // check if distorted depth pixel is outside of the depth image
+  if(index < 0){
+    d_depth_to_c_off[i] = -1;
+    d_undistorted_data[i] = 0;
+    return;
+  }
+
+  // getting depth value for current pixel
+  const float z = d_depth_data[index];
+  d_undistorted_data[i] = z;
+
+  // checking for invalid depth value
+  if(z <= 0.0f){
+    d_depth_to_c_off[i] = -1;
+    return;
+  }
+
+  // calculating x offset for rgb image based on depth value
+  const float color_cx = color.cx + 0.5f; // 0.5f added for later rounding
+  const float rx = (d_map_x[index] + (color.shift_m / z)) * color.fx + color_cx;
+  const int cx = rx; // same as round for positive numbers (0.5f was already added to color_cx)
+  // getting y offset for depth image
+  const int cy = d_map_yi[i];
+  // combining offsets
+  const int c_off = cx + cy * COLOR_COLS;
+
+  // check if c_off is outside of rgb image
+  // checking rx/cx is not needed because the color image is much wider then the depth image
+  if(c_off < 0 || c_off >= COLOR_ROWS * COLOR_COLS){
+     d_depth_to_c_off[i] = -1;
+    return;
+  }
+
+  // saving the offset for later
+  d_depth_to_c_off[i] = c_off;
+
+  // I am not sure if there won't be race conditions here due to overlap, the atomic operation should help.
+  if(enable_filter){
+    // setting a window around the filter map pixel corresponding to the color pixel with the current z value
+    int yi = (cy - filter_height_half) * 1920 + cx - filter_width_half; // index of first pixel to set
+    for(int r = -filter_height_half; r <= filter_height_half; ++r, yi += COLOR_COLS) // index increased by a full row each iteration
+    {
+      float *it = d_filter_map + offset_filter_map + yi;
+      for(int c = -filter_width_half; c <= filter_width_half; ++c, ++it)
+      {
+        // only set if the current z is smaller
+        atomicKeepSmaller(it, z);
+      }
+    }
+  }
+}
+
+/** Construct 'registered' image with filter.
+ *  Filter drops duplicate pixels due to aspect of two cameras.
+ */
+static __global__
+void registerImageFiltered(unsigned int *d_registered_data,
+    const unsigned int * d_rgb_data,
+    const int* d_depth_to_c_off,
+    const float* d_undistorted_data,
+    const float *d_p_filter_map,
+    const float filter_tolerance)
+{
+  // getting index of distorted depth pixel
+  const int i = blockIdx.x * blockDim.x + threadIdx.x;
+
+  // run through all registered color pixels and set them based on filter results
+  const int c_off = d_depth_to_c_off[i];
+
+  // check if offset is out of image
+  if(c_off < 0){
+    d_registered_data[i] = 0;
+    return;
+  }
+
+  const float min_z = d_p_filter_map[c_off];
+  const float z = d_undistorted_data[i];
+
+  // check for allowed depth noise
+  d_registered_data[i] = (z - min_z) / z > filter_tolerance ? 0 : d_rgb_data[c_off];
+
+}
+
+/** Construct 'registered' image. */
+static __global__
+void registerImage(unsigned int *d_registered_data,
+    const unsigned int * d_rgb_data,
+    const int* d_depth_to_c_off)
+{
+  // getting index of distorted depth pixel
+  const int i = blockIdx.x * blockDim.x + threadIdx.x;
+
+  // run through all registered color pixels and set them based on c_off
+  const int c_off = d_depth_to_c_off[i];
+
+  // check if offset is out of image
+  d_registered_data[i] = c_off < 0 ? 0 : d_rgb_data[c_off];
+}
+
+namespace libfreenect2
+{
+
+CudaDeviceFrame::CudaDeviceFrame(size_t width, size_t height, size_t bytes_per_pixel):
+	Frame(width, height, bytes_per_pixel, (unsigned char*)-1)
+{
+  allocateMemory();
+}
+
+CudaDeviceFrame::~CudaDeviceFrame()
+{
+  CALL_CUDA(cudaFree(data));
+}
+
+bool CudaDeviceFrame::allocateMemory()
+{
+  CHECK_CUDA(cudaMalloc(&data, width * height * bytes_per_pixel * sizeof(unsigned char)));
+
+  cudaDeviceSynchronize();
+
+  CHECK_CUDA(cudaGetLastError());
+  return true;
+}
+
+/*
+ * The information used here has been taken from libfreenect2::Registration source
+ * code.
+ */
+static const float depth_q = 0.01;
+static const float color_q = 0.002199;
+
+class CudaRegistrationImpl
+{
+public:
+  CudaRegistrationImpl(Freenect2Device::IrCameraParams depth_p, Freenect2Device::ColorCameraParams rgb_p):
+    depth(depth_p), color(rgb_p),
+    filter_width_half(2), filter_height_half(1), filter_tolerance(0.01f),
+    block_size(128), grid_size(MONO_IMAGE_SIZE/block_size)
+  {
+    good = setupDevice();
+    if (!good)
+      return;
+
+    good = initMaps();
+    if (!good)
+      return;
+  }
+
+  ~CudaRegistrationImpl()
+  {
+    if (good)
+      freeDeviceMemory();
+  }
+
+  void apply(int dx, int dy, float dz, float& cx, float &cy) const;
+  bool apply(const Frame* rgb, const Frame* depth, CudaDeviceFrame* undistorted, CudaDeviceFrame* registered, const bool enable_filter, CudaDeviceFrame* bigdepth, int* color_depth_map) const;
+  void undistortDepth(const Frame *depth, Frame *undistorted) const;
+  void getPointXYZRGB (const Frame* undistorted, const Frame* registered, int r, int c, float& x, float& y, float& z, float& rgb) const;
+  void getPointXYZ (const Frame* undistorted, int r, int c, float& x, float& y, float& z) const;
+  void distort(int mx, int my, float& dx, float& dy) const;
+  void depth_to_color(float mx, float my, float& rx, float& ry) const;
+
+private:
+  Freenect2Device::IrCameraParams depth;    ///< Depth camera parameters.
+  Freenect2Device::ColorCameraParams color; ///< Color camera parameters.
+
+  const int filter_width_half;
+  const int filter_height_half;
+  const float filter_tolerance;
+
+  static const size_t MONO_IMAGE_SIZE = MONO_COLS * MONO_ROWS;
+
+  size_t block_size;
+  size_t grid_size;
+
+  bool good;                                // Memory correctly allocated
+
+  // Maps
+  int* d_distort_map;
+  float* d_depth_to_color_map_x;
+  float* d_depth_to_color_map_y;
+  int* d_depth_to_color_map_yi;
+
+  bool allocateDeviceMemory()
+  {
+    CHECK_CUDA(cudaMalloc(&d_distort_map, MONO_IMAGE_SIZE * sizeof(int)));
+    CHECK_CUDA(cudaMalloc(&d_depth_to_color_map_x, MONO_IMAGE_SIZE * sizeof(float)));
+    CHECK_CUDA(cudaMalloc(&d_depth_to_color_map_y, MONO_IMAGE_SIZE * sizeof(float)));
+    CHECK_CUDA(cudaMalloc(&d_depth_to_color_map_yi, MONO_IMAGE_SIZE * sizeof(int)));
+
+    cudaDeviceSynchronize();
+
+    CHECK_CUDA(cudaGetLastError());
+    return true;
+  }
+
+  bool setupDevice()
+  {
+    // Continue to use same device than cuda_depth_packet_processor?
+    if (!allocateDeviceMemory())
+      return false;
+
+    return true;
+  }
+
+  bool initMaps()
+  {
+    dInitMaps<<<grid_size, block_size>>>(d_distort_map, d_depth_to_color_map_x,
+      d_depth_to_color_map_y, d_depth_to_color_map_yi,
+      depth, color, depth_q, color_q);
+
+    cudaDeviceSynchronize();
+    CHECK_CUDA(cudaGetLastError());
+
+    return true;
+  }
+
+  void freeDeviceMemory()
+  {
+    CALL_CUDA(cudaFree(d_distort_map));
+    CALL_CUDA(cudaFree(d_depth_to_color_map_x));
+    CALL_CUDA(cudaFree(d_depth_to_color_map_y));
+    CALL_CUDA(cudaFree(d_depth_to_color_map_yi));
+  }
+};
+
+CudaRegistration::CudaRegistration(Freenect2Device::IrCameraParams depth_p, Freenect2Device::ColorCameraParams rgb_p):
+  impl_(new CudaRegistrationImpl(depth_p, rgb_p)) {}
+
+CudaRegistration::~CudaRegistration()
+{
+  delete impl_;
+}
+
+bool CudaRegistration::apply(const Frame* rgb, const Frame* depth, CudaDeviceFrame* undistorted, CudaDeviceFrame* registered, const bool enable_filter, CudaDeviceFrame* bigdepth, int* color_depth_map) const
+{
+  return impl_->apply(rgb, depth, undistorted, registered, enable_filter, bigdepth, color_depth_map);
+}
+
+bool CudaRegistrationImpl::apply(const Frame *rgb, const Frame *depth, CudaDeviceFrame *undistorted, CudaDeviceFrame *registered, const bool enable_filter, CudaDeviceFrame *bigdepth, int *color_depth_map) const
+{
+  // Check if all frames are valid and have the correct size
+  if (!rgb || !depth || !undistorted || !registered ||
+    rgb->width != 1920 || rgb->height != 1080 || rgb->bytes_per_pixel != 4 ||
+    depth->width != 512 || depth->height != 424 || depth->bytes_per_pixel != 4 ||
+    undistorted->width != 512 || undistorted->height != 424 || undistorted->bytes_per_pixel != 4 ||
+    registered->width != 512 || registered->height != 424 || registered->bytes_per_pixel != 4)
+  {
+    LOG_ERROR << "Not applying" << std::endl;
+    return false;
+  }
+
+  // Setup memory
+
+  float *d_depth_data;
+  size_t depth_size = depth->width * depth->height * sizeof(float);
+  unsigned int *d_rgb_data;
+  size_t rgb_size = rgb->width * rgb->height * sizeof(unsigned int);
+
+  CHECK_CUDA(cudaMalloc(&d_depth_data, depth_size));
+  cudaMemcpy((void*)d_depth_data,
+             (const void*)depth->data, depth_size,
+             cudaMemcpyHostToDevice);
+
+  CHECK_CUDA(cudaMalloc(&d_rgb_data, rgb_size));
+  cudaMemcpy((void*)d_rgb_data,
+             (const void*)rgb->data, rgb_size,
+             cudaMemcpyHostToDevice);
+
+  cudaDeviceSynchronize();
+  CHECK_CUDA(cudaGetLastError());
+
+  float *d_undistorted_data = (float*)undistorted->data;
+  unsigned int *d_registered_data = (unsigned int*)registered->data;
+  const int *d_map_dist = d_distort_map;
+  const float *d_map_x = d_depth_to_color_map_x;
+  const int *d_map_yi = d_depth_to_color_map_yi;
+
+
+  // Setup parameters
+
+  const int size_depth = MONO_ROWS * MONO_COLS;
+  const int size_color = COLOR_ROWS * COLOR_COLS;
+  //const float color_cx = color.cx + 0.5f; // 0.5f added for later rounding
+
+  // size of filter map with a border of filter_height_half on top and bottom so that no check for borders is needed.
+  // since the color image is wide angle no border to the sides is needed.
+  const int size_filter_map = size_color + COLOR_COLS * filter_height_half * 2;
+  // offset to the important data
+  const int offset_filter_map = COLOR_COLS * filter_height_half;
+
+
+  // Auxiliary maps
+
+  // map for storing the min z values used for each color pixel
+  float *d_filter_map = NULL;
+  // pointer to the beginning of the important data
+  float *d_p_filter_map = NULL;
+
+  // map for storing the color offset for each depth pixel
+  int *d_depth_to_c_off;
+  CHECK_CUDA(cudaMalloc(&d_depth_to_c_off, size_depth * sizeof(int)));
+  if (color_depth_map)
+  {
+    // I don't know where this other color map could be coming from,
+    // so for the moment I will assume it is in host memory.
+    cudaMemcpy((void*)d_depth_to_c_off,
+               (const void*)color_depth_map, size_depth * sizeof(int),
+               cudaMemcpyHostToDevice);
+  }
+  //int *map_c_off = depth_to_c_off;
+
+  // initializing the depth_map with values outside of the Kinect2 range
+  if(enable_filter){
+    if(bigdepth)
+    {
+      d_filter_map = (float*)bigdepth->data;
+    }
+    else
+    {
+      CHECK_CUDA(cudaMalloc(&d_filter_map, size_filter_map * sizeof(float)));
+    }
+    d_p_filter_map = d_filter_map + offset_filter_map;		// works the same even on device
+
+    cudaMemsetFloat(d_filter_map, std::numeric_limits<float>::infinity(), size_filter_map);
+  }
+
+  /* Fix depth distortion, and compute pixel to use from 'rgb' based on depth measurement,
+   * stored as x/y offset in the rgb data.
+   */
+  undistort<<<size_depth/MONO_COLS, MONO_COLS>>>(d_depth_to_c_off,
+      d_undistorted_data, d_filter_map,
+      d_depth_data, d_map_dist,
+      d_map_x, d_map_yi,
+      this->depth, this->color, filter_width_half, filter_height_half, offset_filter_map, enable_filter);
+  if (enable_filter)
+  {
+    registerImageFiltered<<<size_depth/MONO_COLS, MONO_COLS>>>(d_registered_data,
+        d_rgb_data,
+        d_depth_to_c_off,
+        d_undistorted_data,
+        d_p_filter_map,
+        filter_tolerance);
+    if (!bigdepth)
+    {
+      CALL_CUDA(cudaFree(d_filter_map));
+    }
+  }
+  else
+  {
+    registerImage<<<size_depth/MONO_COLS, MONO_COLS>>>(d_registered_data,
+        d_rgb_data,
+        d_depth_to_c_off);
+  }
+
+  // Finish
+
+  // -1 represents Invalid
+  //undistorted->format = undistorted->Float;
+  //registered->format = registered->BGRX;
+
+
+  if (color_depth_map)
+  {
+    // I don't know where this other color map could be coming from,
+    // so for the moment I will assume it is in host memory.
+    // Placing it back to where it came from
+    cudaMemcpy((void*)color_depth_map,
+               (const void*)d_depth_to_c_off, size_depth * sizeof(int),
+               cudaMemcpyDeviceToHost);
+  }
+  CALL_CUDA(cudaFree(d_depth_to_c_off));
+
+  CALL_CUDA(cudaFree(d_depth_data));
+  CALL_CUDA(cudaFree(d_rgb_data));
+
+  return true;
+}
+
+} /* namespace libfreenect2 */
diff --git a/src/opencl_depth_packet_processor.cpp b/src/opencl_depth_packet_processor.cpp
index d8db14d3c..c7f2a6953 100644
--- a/src/opencl_depth_packet_processor.cpp
+++ b/src/opencl_depth_packet_processor.cpp
@@ -251,7 +251,7 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging
     newIrFrame();
     newDepthFrame();
 
-    const int CL_ICDL_VERSION = 2;
+    //const int CL_ICDL_VERSION = 2; // Commented out because it is defined somewhere else
     typedef cl_int (*icdloader_func)(int, size_t, void*, size_t*);
 #ifdef _MSC_VER
 #pragma warning(push)
diff --git a/src/opencl_kde_depth_packet_processor.cpp b/src/opencl_kde_depth_packet_processor.cpp
index 98dbdff54..97765f415 100644
--- a/src/opencl_kde_depth_packet_processor.cpp
+++ b/src/opencl_kde_depth_packet_processor.cpp
@@ -259,7 +259,7 @@ class OpenCLKdeDepthPacketProcessorImpl: public WithPerfLogging
     newIrFrame();
     newDepthFrame();
 
-    const int CL_ICDL_VERSION = 2;
+    //const int cl_icdl_version = 2;   // CL_ICDL_VERSION is defined somewhere else
     typedef cl_int (*icdloader_func)(int, size_t, void*, size_t*);
 #ifdef _MSC_VER
 #pragma warning(push)
@@ -277,6 +277,7 @@ class OpenCLKdeDepthPacketProcessorImpl: public WithPerfLogging
     if (clGetICDLoaderInfoOCLICD != NULL)
     {
       char buf[16];
+      //if (clGetICDLoaderInfoOCLICD(cl_icdl_version, sizeof(buf), buf, NULL) == CL_SUCCESS)
       if (clGetICDLoaderInfoOCLICD(CL_ICDL_VERSION, sizeof(buf), buf, NULL) == CL_SUCCESS)
       {
         if (strcmp(buf, "2.2.4") < 0)
diff --git a/src/registration.cpp b/src/registration.cpp
index 49a3b03e0..b4a9ddd07 100644
--- a/src/registration.cpp
+++ b/src/registration.cpp
@@ -380,7 +380,7 @@ RegistrationImpl::RegistrationImpl(Freenect2Device::IrCameraParams depth_p, Free
 
   for (int y = 0; y < 424; y++) {
     for (int x = 0; x < 512; x++) {
-      // compute the dirstored coordinate for current pixel
+      // compute the distorted coordinate for current pixel
       distort(x,y,mx,my);
       // rounding the values and check if the pixel is inside the image
       ix = (int)(mx + 0.5f);
@@ -388,7 +388,7 @@ RegistrationImpl::RegistrationImpl(Freenect2Device::IrCameraParams depth_p, Free
       if(ix < 0 || ix >= 512 || iy < 0 || iy >= 424)
         index = -1;
       else
-        // computing the index from the coordianted for faster access to the data
+        // computing the index from the coordinates for faster access to the data
         index = iy * 512 + ix;
       *map_dist++ = index;