forked from ROCm/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNestedTensorImpl.cpp
166 lines (150 loc) · 6 KB
/
NestedTensorImpl.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#include <ATen/ATen.h>
#include <ATen/NamedTensorUtils.h>
#include <ATen/WrapDimUtils.h>
#include <ATen/core/op_registration/op_registration.h>
#include <ATen/NestedTensorImpl.h>
#include <c10/core/DispatchKey.h>
namespace at {
namespace native {
inline std::vector<int64_t> construct_opt_sizes(const at::Tensor& sizes) {
// torch.tensor([]) is considered to have `dim() = 1` and `size(0) = 0`
// torch.nested_tensor([]) should also has `dim() = 1` and `size(0) = 0`
if (sizes.dim() == 0) {
return std::vector<int64_t>({0});
}
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(sizes.dim() == 2);
std::vector<int64_t> result(1, sizes.sizes()[0]);
if (sizes.dim() > 0) {
size_t nested_dim = result.size();
int64_t* sizes_ptr = sizes.data_ptr<int64_t>();
result.resize(nested_dim + sizes.sizes()[1]);
int64_t sizes_size_0 = sizes.sizes()[0];
int64_t sizes_size_1 = sizes.sizes()[1];
for (const auto i : c10::irange(sizes_size_1)) {
result[nested_dim + i] = sizes_ptr[i];
}
for (const auto j : c10::irange(sizes_size_1)) {
for (const auto i : c10::irange(sizes_size_0)) {
if (result[nested_dim + j] &&
(result[nested_dim + j] != sizes_ptr[i * sizes.size(1) + j])) {
result[nested_dim + j] = -1;
}
}
}
}
return result;
}
// assume contiguous, we can construct stride from size
inline at::Tensor construct_nested_stride_tensor(const at::Tensor& sizes) {
// empty `sizes` means empty nested tensor, so return empty strides
if (sizes.dim() == 0) {
return sizes;
}
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(sizes.dim() == 2);
int64_t orig_dim = sizes.size(1);
// `sizes`.sizes() = ntensors x 0 means empty but shaped `sizes`
// in this case strides is also empty but shaped
if (orig_dim == 0) {
return sizes;
}
at::Tensor strides = sizes.new_empty(sizes.sizes());
const int64_t* sizes_ptr = sizes.data_ptr<int64_t>();
int64_t* strides_ptr = strides.data_ptr<int64_t>();
for (int64_t i = 0; i < sizes.size(0); i++) {
strides_ptr[orig_dim - 1] = 1;
int64_t product = sizes_ptr[orig_dim - 1];
for (int64_t j = orig_dim - 2; j >= 0; j--) {
strides_ptr[j] = product;
product *= sizes_ptr[j];
}
sizes_ptr += orig_dim;
strides_ptr += orig_dim;
}
return strides;
}
// [Note: Nested Tensor Autograd] The Nested Tensor key is a functionality
// key and therefore getAutogradRelatedKeySetFromBackend will return the
// wrong autograd key. For this specific impl we make sure to register the
// correct Autograd key which is AutogradNestedTensor
c10::DispatchKeySet generate_nested_key_set(at::Tensor buffer) {
c10::DispatchKeySet key_set =
(c10::DispatchKeySet(DispatchKey::NestedTensor) |
c10::DispatchKeySet(
buffer.is_cuda() ? BackendComponent::CUDABit
: BackendComponent::CPUBit));
// Add AutogradNestedTensor specific keys
key_set = key_set | inplace_or_view_ks | autograd_nested;
return key_set;
}
NestedTensorImpl::NestedTensorImpl(
at::Tensor buffer,
at::Tensor nested_size_tensor)
: TensorImpl(
generate_nested_key_set(buffer),
buffer.dtype(),
buffer.device()),
buffer_(std::move(buffer)),
nested_size_tensor_(std::move(nested_size_tensor)),
nested_stride_tensor_(construct_nested_stride_tensor(nested_size_tensor_)),
opt_sizes_(construct_opt_sizes(nested_size_tensor_))
{
TORCH_WARN_ONCE(
"The PyTorch API of nested tensors is in prototype stage and will change "
"in the near future.");
TORCH_INTERNAL_ASSERT(buffer_.is_cuda() || buffer_.is_cpu(), "NestedTensorImpl buffer must be either CUDA or CPU but got ", buffer_);
TORCH_INTERNAL_ASSERT(nested_size_tensor_.is_contiguous());
int64_t size_dim = nested_size_tensor_.dim();
TORCH_INTERNAL_ASSERT(size_dim == 0 || size_dim == 2);
refresh_dim();
set_sizes_strides_policy(c10::TensorImpl::SizesStridesPolicy::CustomSizes);
}
void NestedTensorImpl::refresh_dim() {
const auto my_dim = nested_size_tensor_.dim() ? nested_size_tensor_.sizes()[1] + 1 : 1;
sizes_and_strides_.resize(my_dim);
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(dim() == my_dim);
}
int64_t NestedTensorImpl::dim_custom() const {
return dim_default();
}
// Currently sizes and strides assume contiguous
int64_t NestedTensorImpl::numel_custom() const {
if (nested_size_tensor_.dim() == 0) {
return 0;
}
constexpr auto numel_max = std::min(
static_cast<uint64_t>(std::numeric_limits<int64_t>::max()),
static_cast<uint64_t>(std::numeric_limits<size_t>::max()));
const auto nt_dim = nested_size_tensor_.size(1);
const int64_t* sizes_ptr = nested_size_tensor_.data_ptr<int64_t>();
uint64_t num_elements{0};
for (const auto i : c10::irange(nested_size_tensor_.size(0))) {
uint64_t n = 1;
const auto start{sizes_ptr + i * nt_dim};
const auto end{start + nt_dim};
bool overflows = c10::safe_multiplies_u64(start, end, &n);
num_elements += n;
overflows |= (num_elements > numel_max);
TORCH_CHECK(!overflows, "numel: integer multiplication overflow");
}
return static_cast<int64_t>(num_elements);
}
bool NestedTensorImpl::is_contiguous_custom(MemoryFormat) const {
TORCH_CHECK(false, "is_contiguous is disabled.");
}
IntArrayRef NestedTensorImpl::sizes_custom() const {
TORCH_CHECK(false, "Internal error: NestedTensorImpl doesn't support sizes. Please file an issue on https://github.com/pytorch/nestedtensor");
}
c10::SymIntArrayRef NestedTensorImpl::sym_sizes_custom() const {
TORCH_CHECK(false, "Internal error: NestedTensorImpl doesn't support sizes. Please file an issue on https://github.com/pytorch/nestedtensor");
}
c10::SymIntArrayRef NestedTensorImpl::sym_sizes() const {
return sym_sizes_custom();
}
IntArrayRef NestedTensorImpl::strides_custom() const {
TORCH_CHECK(false, "Internal error: NestedTensorImpl doesn't support strides. Please file an issue on https://github.com/pytorch/nestedtensor");
}
const char* NestedTensorImpl::tensorimpl_type_name() const {
return "NestedTensorImpl";
}
} // namespace native
} // namespace at