forked from SAITPublic/PIMPatches
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path0001-Patch-for-enabling-PIM-on-pytorch.patch
211 lines (197 loc) · 7.37 KB
/
0001-Patch-for-enabling-PIM-on-pytorch.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
From 3c887ff48b93c38afbbb28dfbc882c2881336e30 Mon Sep 17 00:00:00 2001
From: Sudhakar M K <[email protected]>
Date: Fri, 28 May 2021 13:01:24 +0000
Subject: [PATCH] Patch for enabling PIM on pytorch
Details of patches:
- Add PIMGemv for hidden gemv operation of LSTM
- Enable bundle creation in PIMGemv
- Add condition for PIM Gemv execution
- Removed bundle API calls
- Add fp16 path for miopen
- Change PimGemv invoke condition for ATEN-PIM path
- Remove PimInit/Deinit call from MIOpen library
- Update LSTM to use optimal kernel
- Change PimExecuteGemv to PimExecuteGemm
Contributors:
Sudhakar M K <[email protected]>
Hyeonsu Kim <[email protected]>
Amit Yadav <[email protected]>
sundeep.k <[email protected]>
Hosang Yoon <[email protected]>
yao01.xiao <[email protected]>
---
aten/src/ATen/CMakeLists.txt | 1 +
aten/src/ATen/native/RNN.cpp | 97 +++++++++++++++++++++++++++++++++++-
cmake/Dependencies.cmake | 3 +-
cmake/public/LoadHIP.cmake | 4 ++
4 files changed, 102 insertions(+), 3 deletions(-)
diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
index 114d970bf7..f735fc35e9 100644
--- a/aten/src/ATen/CMakeLists.txt
+++ b/aten/src/ATen/CMakeLists.txt
@@ -439,6 +439,7 @@ if(USE_ROCM)
# NB: Instead of adding it to this list, we add it by hand
# to caffe2_hip, because it needs to be a PRIVATE dependency
# list(APPEND ATen_HIP_DEPENDENCY_LIBS ATEN_CUDA_FILES_GEN_LIB)
+ # list(APPEND ATen_HIP_DEPENDENCY_LIBS PimRuntime)
endif()
set(ATEN_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/${AT_INSTALL_INCLUDE_DIR}")
diff --git a/aten/src/ATen/native/RNN.cpp b/aten/src/ATen/native/RNN.cpp
index c30bb6f3f4..d5380292c0 100644
--- a/aten/src/ATen/native/RNN.cpp
+++ b/aten/src/ATen/native/RNN.cpp
@@ -9,6 +9,9 @@
#include <ATen/native/quantized/cpu/qnnpack_utils.h>
#include <torch/custom_class.h>
#include <torch/library.h>
+#include <pim_runtime_api.h>
+#include <half.hpp>
+#include <hip/hip_runtime.h>
torch::class_<LinearPackedParamsBase> register_linear_params();
@@ -16,13 +19,67 @@ namespace at { namespace native {
namespace {
+Tensor PimGemm(const Tensor& h, const Tensor& wt)
+{
+ uint32_t batch = 1;
+ uint32_t channel = 1;
+ uint32_t h_size = h.size(0);
+ uint32_t in_size = h.size(1);
+ uint32_t out_size = wt.size(1);
+ uint32_t w_size = in_size * out_size;
+
+ PimGemmDesc *pim_desc = PimCreateGemmDesc(batch, channel, h_size, in_size, h_size, out_size, PIM_FP16, I_X_W);
+ PimBo *dev_in = PimCreateBo(pim_desc, MEM_TYPE_DEVICE, GEMM_INPUT, (void*)h.data_ptr());
+ uint64_t w_addr = reinterpret_cast<uint64_t>(wt.data_ptr());
+ PimBo *dev_weight = PimCreateBo(pim_desc, MEM_TYPE_DEVICE, GEMM_WEIGHT, reinterpret_cast<void*>(w_addr), true);
+
+ // or one can do this way:
+ // PimBo* host_weight = PimCreateBo(pim_desc, MEM_TYPE_HOST, GEMV_WEIGHT);
+ // hipMemcpy(host_weight->data, reinterpret_cast<void*>(w_addr), w_size * sizeof(half_float::half), hipMemcpyDeviceToHost);
+
+ auto output = at::empty({1, wt.size(1)}, h.options());
+ PimBo *dev_out = PimCreateBo(pim_desc, MEM_TYPE_DEVICE, GEMM_OUTPUT, (void*)output.data_ptr());
+
+ PimExecuteGemm(dev_out, dev_in, dev_weight, nullptr, PimActFunc::NONE, I_X_W, nullptr);
+
+ PimDestroyGemmDesc(pim_desc);
+ PimDestroyBo(dev_in);
+ PimDestroyBo(dev_weight);
+ PimDestroyBo(dev_out);
+
+ return output;
+}
+
// Check if pytorch is compiled with MIOpen.
bool use_miopen(const at::Tensor& input, const double dropout_state) {
- bool is_miopen_acceptable = ((input.scalar_type() == at::kFloat)|| (input.scalar_type() == at::kHalf)) &&
+
+ char env_p, *ptr;
+ if(std::getenv("ENABLE_MIOPEN_PYTORCH")) {
+ ptr = std::getenv("ENABLE_MIOPEN_PYTORCH");
+ env_p = *ptr;
+ }
+ else{
+ env_p = '0';
+ }
+ if(env_p == '0')
+ return false;
+
+ bool is_miopen_acceptable = (input.scalar_type() == at::kFloat || input.scalar_type() == at::kHalf) &&
(detail::getCUDAHooks().compiledWithMIOpen()) &&
(input.is_cuda()) &&
(dropout_state == 0.0) &&
(at::globalContext().userEnabledCuDNN());
+
+ int in_is_vector = 0;
+ for (int i = 0; i < input.dim(); ++i) {
+ if (input.size(i) != 1) {
+ in_is_vector += 1;
+ }
+ }
+ if (in_is_vector == 1) {
+ is_miopen_acceptable = false;
+ }
+
return is_miopen_acceptable;
}
@@ -105,9 +162,47 @@ struct CellParams : public CellParamsBase {
const Tensor& w_hr; /* only defined for LSTMs with projections */
Tensor matmul_ih(const Tensor& input) const override {
+ char env_p, *ptr;
+
+ if(std::getenv("ENABLE_PIM")) {
+ ptr = std::getenv("ENABLE_PIM");
+ env_p = *ptr;
+ }
+ else
+ env_p = '0';
+
+ if(env_p == '1') {
+ auto size_vec = input.sizes().vec();
+ auto is_vector = std::count_if(size_vec.begin(), size_vec.end(), [](int64_t item) {
+ return item != 1;
+ });
+ if (is_vector == 1) {
+ return PimGemm(input, w_ih.t());
+ }
+ }
+
return at::matmul(input, w_ih.t());
}
Tensor matmul_hh(const Tensor& h) const override {
+ char env_p, *ptr;
+
+ if(std::getenv("ENABLE_PIM")) {
+ ptr = std::getenv("ENABLE_PIM");
+ env_p = *ptr;
+ }
+ else
+ env_p = '0';
+
+ if(env_p == '1') {
+ auto size_vec = h.sizes().vec();
+ auto is_vector = std::count_if(size_vec.begin(), size_vec.end(), [](int64_t item) {
+ return item != 1;
+ });
+ if (is_vector == 1) {
+ return PimGemm(h, w_hh.t());
+ }
+ }
+
return at::matmul(h, w_hh.t());
}
Tensor matmul_hr(const Tensor& h) const override {
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index ca560288a4..bc47c518f1 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -1283,8 +1283,7 @@ if(USE_ROCM)
# This is needed for library added by hip_add_library (same for hip_add_executable)
hip_include_directories(${Caffe2_HIP_INCLUDE})
- set(Caffe2_PUBLIC_HIP_DEPENDENCY_LIBS
- ${PYTORCH_HIP_HCC_LIBRARIES} ${PYTORCH_MIOPEN_LIBRARIES} ${PYTORCH_RCCL_LIBRARIES} ${hipcub_LIBRARIES} ${ROCM_HIPRTC_LIB} ${ROCM_ROCTX_LIB})
+ set(Caffe2_PUBLIC_HIP_DEPENDENCY_LIBS ${PYTORCH_HIP_HCC_LIBRARIES} ${PYTORCH_MIOPEN_LIBRARIES} ${PYTORCH_RCCL_LIBRARIES} ${hipcub_LIBRARIES} ${ROCM_HIPRTC_LIB} ${ROCM_ROCTX_LIB} ${ROCM_PIM_LIB})
# Note [rocblas & rocfft cmake bug]
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake
index 85ec5b4ad0..46a957d644 100644
--- a/cmake/public/LoadHIP.cmake
+++ b/cmake/public/LoadHIP.cmake
@@ -139,6 +139,9 @@ endif()
# Add HIP to the CMAKE Module Path
set(CMAKE_MODULE_PATH ${HIP_PATH}/cmake ${CMAKE_MODULE_PATH})
+# Disable Asserts In Code (Can't use asserts on HIP stack.)
+#add_definitions(-DNDEBUG)
+
macro(find_package_and_print_version PACKAGE_NAME)
find_package("${PACKAGE_NAME}" ${ARGN})
message("${PACKAGE_NAME} VERSION: ${${PACKAGE_NAME}_VERSION}")
@@ -251,5 +254,6 @@ if(HIP_FOUND)
find_library(ROCM_HIPRTC_LIB ${hip_library_name} HINTS ${HIP_PATH}/lib)
# roctx is part of roctracer
find_library(ROCM_ROCTX_LIB roctx64 HINTS ${ROCTRACER_PATH}/lib)
+ find_library(ROCM_PIM_LIB PimRuntime HINTS ${ROCM_PATH}/lib)
set(roctracer_INCLUDE_DIRS ${ROCTRACER_PATH}/include)
endif()
--
2.17.1