Skip to content

Commit

Permalink
[AutoParallel] Polish dist tensor design (#56368)
Browse files Browse the repository at this point in the history
* polish dist teensor design

* adjust constructor

* polish details

* polish details design

* fix compile error

* refactor init tensor impl

* fix reshard test

* polish details

* add unittest for coverage
  • Loading branch information
chenwhql authored Aug 22, 2023
1 parent ffff3da commit 8495377
Show file tree
Hide file tree
Showing 26 changed files with 326 additions and 318 deletions.
14 changes: 7 additions & 7 deletions paddle/fluid/eager/grad_node_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ void GradNodeBase::SetGradInMeta(const paddle::Tensor& fwd_out,
return;
}

phi::DenseTensor* dense_tensor = nullptr;
const phi::DenseTensor* dense_tensor = nullptr;
// Record TensorMeta
if (phi::DenseTensor::classof(fwd_out.impl().get())) {
// Only Copy Meta
Expand All @@ -130,8 +130,8 @@ void GradNodeBase::SetGradInMeta(const paddle::Tensor& fwd_out,
// TODO(chenweihang): DistTensor contains global and local meta, here
// only set the local meta now, we should set global meta later
dense_tensor =
static_cast<phi::distributed::DistTensor*>(fwd_out.impl().get())
->mutable_value();
&(static_cast<phi::distributed::DistTensor*>(fwd_out.impl().get())
->value());
#endif
} else {
VLOG(7) << "Unable to initialize the DenseTensorMeta of GradSlotMeta with "
Expand Down Expand Up @@ -270,16 +270,16 @@ void GradNodeBase::SetGradOutMeta(const paddle::Tensor& fwd_in,
meta.SetPlace(fwd_in.place());
#ifdef PADDLE_WITH_DISTRIBUTE
} else if (phi::distributed::DistTensor::classof(fwd_in.impl().get())) {
phi::DenseTensor* dense_tensor =
const phi::DenseTensor& dense_tensor =
static_cast<phi::distributed::DistTensor*>(fwd_in.impl().get())
->mutable_value();
->value();
PADDLE_ENFORCE_NE(
dense_tensor->meta().dtype,
dense_tensor.meta().dtype,
phi::DataType::UNDEFINED,
paddle::platform::errors::Fatal("Attempting to copy DenseTensorMeta "
"with phi::DataType::UNDEFINED,"
"which is illegal."));
meta.SetTensorMeta(dense_tensor->meta());
meta.SetTensorMeta(dense_tensor.meta());
meta.SetPlace(fwd_in.place());
#endif
} else {
Expand Down
7 changes: 2 additions & 5 deletions paddle/fluid/eager/grad_tensor_holder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,9 @@ void GradTensorHolder::CopyValueFromTensor(size_t slot_id,
// TODO(chenweihang): replace by valid dist_attr later
auto temp =
paddle::experimental::full(t.shape(), 1, t.dtype(), t.place());
auto dense_temp =
std::dynamic_pointer_cast<phi::DenseTensor>(temp.impl());
auto dense_temp = static_cast<phi::DenseTensor*>(temp.impl().get());
auto dist_tensor = std::make_shared<phi::distributed::DistTensor>(
dense_temp,
dense_temp->meta(),
std::make_shared<phi::distributed::TensorDistAttr>());
*dense_temp, phi::distributed::TensorDistAttr());
temp.set_impl(dist_tensor);
buffer_[slot_id][rank] = temp;
#endif
Expand Down
9 changes: 3 additions & 6 deletions paddle/fluid/pybind/auto_parallel_py.cc
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,7 @@ void BindAutoParallel(py::module *m) {
"is_suitable",
[](phi::distributed::ReshardFunction &self,
py::handle py_tensor,
const std::shared_ptr<phi::distributed::TensorDistAttr>
&dist_attr) {
const phi::distributed::TensorDistAttr &dist_attr) {
auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0);
auto p_dist =
std::dynamic_pointer_cast<phi::distributed::DistTensor>(
Expand All @@ -135,8 +134,7 @@ void BindAutoParallel(py::module *m) {
[](phi::distributed::ReshardFunction &self,
phi::DeviceContext *dev_ctx,
py::handle py_tensor,
const std::shared_ptr<phi::distributed::TensorDistAttr>
&dist_attr) {
const phi::distributed::TensorDistAttr &dist_attr) {
auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0);
auto p_dist =
std::dynamic_pointer_cast<phi::distributed::DistTensor>(
Expand Down Expand Up @@ -281,8 +279,7 @@ void BindAutoParallel(py::module *m) {
py::arg("memo"))
.def("__str__", &DeviceMesh::to_string);

py::class_<TensorDistAttr, std::shared_ptr<TensorDistAttr>> py_dist_attr(
*m, "TensorDistAttr");
py::class_<TensorDistAttr> py_dist_attr(*m, "TensorDistAttr");
g_tensor_dist_attr_pytype =
reinterpret_cast<PyTypeObject *>(py_dist_attr.ptr());
py_dist_attr.def(py::init<>())
Expand Down
159 changes: 71 additions & 88 deletions paddle/fluid/pybind/eager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,52 +68,6 @@ PyObject* TensorNew(PyTypeObject* type, PyObject* args, PyObject* kwargs) {
return obj;
}

#ifdef PADDLE_WITH_DISTRIBUTE
void EmptyDistTensorInitializer(
TensorObject* self,
const std::string& name,
const paddle::platform::Place& place,
const std::shared_ptr<TensorDistAttr>& dist_attr,
bool persistable = false,
int stop_gradient = -1,
framework::proto::VarType::Type dtype =
paddle::framework::proto::VarType::FP32,
const std::vector<int>& dims = {0}) {
auto ddims = phi::make_ddim(dims);
self->tensor.set_name(name);
auto autograd_meta = egr::EagerUtils::autograd_meta(&(self->tensor));
autograd_meta->SetPersistable(persistable);
if (stop_gradient != -1) {
autograd_meta->SetStopGradient(static_cast<bool>(stop_gradient));
}

std::shared_ptr<DistTensor> dist_tensor = nullptr;
if (dims.size() == 1 && dims[0] == 0) {
std::shared_ptr<phi::Allocation> allocation_ptr = nullptr;
dist_tensor = std::make_shared<DistTensor>(
allocation_ptr,
phi::DenseTensorMeta(paddle::framework::TransToPhiDataType(dtype),
ddims),
dist_attr);
} else {
dist_tensor = std::make_shared<DistTensor>(
std::make_shared<phi::Allocation>(),
phi::DenseTensorMeta(paddle::framework::TransToPhiDataType(dtype),
ddims),
dist_attr);
}
self->tensor.set_impl(dist_tensor);

if (!autograd_meta->GetMutableGradNode()) {
autograd_meta->SetGradNode(
std::make_shared<egr::GradNodeAccumulation>(autograd_meta));
VLOG(3) << "Tensor(" << name
<< ") have not GradNode, add GradNodeAccumulation"
<< autograd_meta->GradNode() << " for it.";
}
}
#endif

// TODO(jiabin): Overload this once we need more constructor in Python
void EmptyTensorInitializer(TensorObject* self,
const std::string& name,
Expand Down Expand Up @@ -184,44 +138,71 @@ void EmptyStringTensorInitializer(TensorObject* self,
}

#ifdef PADDLE_WITH_DISTRIBUTE
void InitDistTensorWithNumpyValue(TensorObject* self,
const py::object& array,
const paddle::platform::Place& place,
bool zero_copy = false) {
PADDLE_ENFORCE_EQ(
self->tensor.defined(),
true,
paddle::platform::errors::Unavailable(
"Calling InitDistTensorWithNumpyValue of Eager Tensor without "
"EmptyDistTensorInitializer is "
"forbidden. Please check your code and make sure you new a "
"eager tensor before init it with NumPy."));
DistTensor* dist_tensor_ptr =
static_cast<DistTensor*>(self->tensor.impl().get());
phi::DenseTensor* impl_ptr =
static_cast<phi::DenseTensor*>(dist_tensor_ptr->mutable_value());
void CreateDistTensorWithNumpyValue(TensorObject* self,
const std::string& name,
const paddle::platform::Place& place,
const TensorDistAttr& dist_attr,
const py::object& array,
bool persistable = false,
int stop_gradient = -1,
bool zero_copy = false,
framework::proto::VarType::Type dtype =
paddle::framework::proto::VarType::FP32,
const std::vector<int>& dims = {0}) {
auto ddims = phi::make_ddim(dims);
self->tensor.set_name(name);
auto autograd_meta = egr::EagerUtils::autograd_meta(&(self->tensor));
autograd_meta->SetPersistable(persistable);
if (stop_gradient != -1) {
autograd_meta->SetStopGradient(static_cast<bool>(stop_gradient));
}

phi::DenseTensor dense_tensor;
if (dims.size() == 1 && dims[0] == 0) {
std::shared_ptr<phi::Allocation> allocation_ptr = nullptr;
dense_tensor = phi::DenseTensor(
nullptr,
phi::DenseTensorMeta(paddle::framework::TransToPhiDataType(dtype),
ddims));
} else {
dense_tensor = phi::DenseTensor(
std::make_shared<phi::Allocation>(),
phi::DenseTensorMeta(paddle::framework::TransToPhiDataType(dtype),
ddims));
}

if (platform::is_cpu_place(place)) {
SetTensorFromPyArray<platform::CPUPlace>(impl_ptr, array, place, zero_copy);
SetTensorFromPyArray<platform::CPUPlace>(
&dense_tensor, array, place, zero_copy);
} else if (platform::is_xpu_place(place)) {
SetTensorFromPyArray<platform::XPUPlace>(impl_ptr, array, place, zero_copy);
SetTensorFromPyArray<platform::XPUPlace>(
&dense_tensor, array, place, zero_copy);
} else if (platform::is_gpu_place(place)) {
SetTensorFromPyArray<platform::CUDAPlace>(
impl_ptr, array, place, zero_copy);
&dense_tensor, array, place, zero_copy);
} else if (platform::is_cuda_pinned_place(place)) {
SetTensorFromPyArray<platform::CUDAPinnedPlace>(
impl_ptr, array, place, zero_copy);
&dense_tensor, array, place, zero_copy);
} else if (platform::is_custom_place(place)) {
SetTensorFromPyArray<platform::CustomPlace>(
impl_ptr, array, place, zero_copy);
&dense_tensor, array, place, zero_copy);
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Place should be one of "
"CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/CustomPlace"));
}

// TODO(dev): dist_tensor meta is not equal to dense tensor meta
dist_tensor_ptr->set_meta(impl_ptr->meta());
auto dist_tensor =
std::make_shared<phi::distributed::DistTensor>(dense_tensor, dist_attr);
self->tensor.set_impl(dist_tensor);

if (!autograd_meta->GetMutableGradNode()) {
autograd_meta->SetGradNode(
std::make_shared<egr::GradNodeAccumulation>(autograd_meta));
VLOG(3) << "Tensor(" << name
<< ") have not GradNode, add GradNodeAccumulation"
<< autograd_meta->GradNode() << " for it.";
}
}
#endif

Expand Down Expand Up @@ -284,28 +265,25 @@ void InitStringTensorWithNumpyValue(TensorObject* self, const py::object& obj) {
}

#ifdef PADDLE_WITH_DISTRIBUTE
void InitDistTensorWithTensor(
TensorObject* self,
const paddle::Tensor& src,
const paddle::platform::Place& place,
const std::string& name,
const std::shared_ptr<TensorDistAttr>& dist_attr) {
void InitDistTensorWithTensor(TensorObject* self,
const paddle::Tensor& src,
const paddle::platform::Place& place,
const std::string& name,
const TensorDistAttr& dist_attr) {
PADDLE_ENFORCE(src.is_dense_tensor(),
paddle::platform::errors::InvalidArgument(
"DistTensor can only initialize by DenseTensor"));
self->tensor.set_name(name);
if (place == src.place()) {
std::shared_ptr<phi::DenseTensor> tensor =
std::static_pointer_cast<phi::DenseTensor>(src.impl());
self->tensor.set_impl(
std::make_shared<DistTensor>(tensor, tensor->meta(), dist_attr));
self->tensor.set_impl(std::make_shared<DistTensor>(*tensor, dist_attr));
VLOG(4) << "Same place, do ShareDataWith for DistTensor.";
} else {
std::shared_ptr<phi::DenseTensor> tensor =
std::static_pointer_cast<phi::DenseTensor>(
src.copy_to(place, true).impl());
self->tensor.set_impl(
std::make_shared<DistTensor>(tensor, tensor->meta(), dist_attr));
self->tensor.set_impl(std::make_shared<DistTensor>(*tensor, dist_attr));
VLOG(4) << "Different place, do TensorCopy for DistTensor.";
}
if (src.get_autograd_meta()) {
Expand Down Expand Up @@ -416,13 +394,13 @@ paddle::platform::Place ParsePlace(
}

#ifdef PADDLE_WITH_DISTRIBUTE
std::shared_ptr<TensorDistAttr> ParseDistAttrArgs(
TensorDistAttr ParseDistAttrArgs(
std::unordered_map<std::string, PyObject*> kws_map,
std::unordered_map<std::string, Py_ssize_t> kw_order_map,
PyObject* args,
bool flag_kwargs,
Py_ssize_t args_num) {
std::shared_ptr<TensorDistAttr> dist_attr = nullptr;
TensorDistAttr dist_attr;
if (kw_order_map["dist_attr"] <= args_num) {
dist_attr = CastPyArg2DistAttr(
PyTuple_GET_ITEM(args, kw_order_map["dist_attr"] - 1),
Expand Down Expand Up @@ -530,13 +508,18 @@ void AutoInitTensorByPyArray(TensorObject* py_tensor_ptr,
"stop_gradient", kws_map, kw_order_map, args, flag_kwargs, args_num);

#ifdef PADDLE_WITH_DISTRIBUTE
std::shared_ptr<TensorDistAttr> dist_attr =
TensorDistAttr dist_attr =
ParseDistAttrArgs(kws_map, kw_order_map, args, flag_kwargs, args_num);

if (dist_attr) {
EmptyDistTensorInitializer(
py_tensor_ptr, act_name, place, dist_attr, persistable, stop_gradient);
InitDistTensorWithNumpyValue(py_tensor_ptr, numpy_value, place, zero_copy);
if (!dist_attr.empty()) {
CreateDistTensorWithNumpyValue(py_tensor_ptr,
act_name,
place,
dist_attr,
numpy_value,
persistable,
stop_gradient,
zero_copy);
return;
}
#endif
Expand Down Expand Up @@ -572,7 +555,7 @@ void AutoInitTensorByTensor(TensorObject* py_tensor_ptr,
act_name = ParseName(kws_map, kw_order_map, args, flag_kwargs, args_num);

#ifdef PADDLE_WITH_DISTRIBUTE
std::shared_ptr<TensorDistAttr> dist_attr =
TensorDistAttr dist_attr =
ParseDistAttrArgs(kws_map, kw_order_map, args, flag_kwargs, args_num);
#endif

Expand All @@ -595,7 +578,7 @@ void AutoInitTensorByTensor(TensorObject* py_tensor_ptr,
}
}
#ifdef PADDLE_WITH_DISTRIBUTE
if (dist_attr) {
if (!dist_attr.empty()) {
InitDistTensorWithTensor(
py_tensor_ptr, src_tensor, place, act_name, dist_attr);
} else {
Expand Down
9 changes: 9 additions & 0 deletions paddle/fluid/pybind/eager_method.cc
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,15 @@ static PyObject* tensor_method_numpy(TensorObject* self,
return array;
}
auto tensor_dims = self->tensor.shape();
#ifdef PADDLE_WITH_DISTRIBUTE
// Now the DistTensor's numpy() return the local tensor value
if (self->tensor.is_dist_tensor()) {
tensor_dims = phi::vectorize(
static_cast<phi::distributed::DistTensor*>(self->tensor.impl().get())
->value()
.dims());
}
#endif
auto numpy_dtype = TensorDtype2NumpyDtype(self->tensor.type());
auto sizeof_dtype = phi::SizeOf(self->tensor.type());
Py_intptr_t py_dims[paddle::framework::DDim::kMaxRank]; // NOLINT
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/pybind/eager_properties.cc
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ PyObject* tensor_properties_get_dist_attr(TensorObject* self, void* closure) {
#ifdef PADDLE_WITH_DISTRIBUTE
phi::distributed::DistTensor* dist_tensor =
static_cast<phi::distributed::DistTensor*>(self->tensor.impl().get());
return ToPyObject(dist_tensor->dist_attr().get());
return ToPyObject(&dist_tensor->dist_attr());
#else
RETURN_PY_NONE
#endif
Expand Down
5 changes: 2 additions & 3 deletions paddle/fluid/pybind/eager_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -547,11 +547,10 @@ platform::Place CastPyArg2Place(PyObject* obj, ssize_t arg_pos) {

#ifdef PADDLE_WITH_DISTRIBUTE
using phi::distributed::TensorDistAttr;
std::shared_ptr<TensorDistAttr> CastPyArg2DistAttr(PyObject* obj,
ssize_t arg_pos) {
TensorDistAttr CastPyArg2DistAttr(PyObject* obj, ssize_t arg_pos) {
if (PyObject_IsInstance(
obj, reinterpret_cast<PyObject*>(g_tensor_dist_attr_pytype))) {
return ::pybind11::handle(obj).cast<std::shared_ptr<TensorDistAttr>>();
return ::pybind11::handle(obj).cast<TensorDistAttr>();
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"argument (position %d) must be "
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/pybind/eager_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -313,8 +313,8 @@ paddle::DataType CastPyArg2DataTypeDirectly(PyObject* obj,
ssize_t arg_pos);

#ifdef PADDLE_WITH_DISTRIBUTE
std::shared_ptr<phi::distributed::TensorDistAttr> CastPyArg2DistAttr(
PyObject* obj, ssize_t arg_pos);
phi::distributed::TensorDistAttr CastPyArg2DistAttr(PyObject* obj,
ssize_t arg_pos);
#endif

paddle::optional<paddle::Tensor> GetOptionalTensorFromArgs(
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/pybind/tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1029,7 +1029,7 @@ void BindTensor(pybind11::module &m) { // NOLINT
py::class_<DistTensor>(m, "DistTensor")
.def(
"get_tensor",
[](DistTensor &self) { return self.mutable_value(); },
[](DistTensor &self) { return self.value(); },
py::return_value_policy::reference)
.def("numel",
[](DistTensor &self) -> int64_t { return self.value().numel(); });
Expand Down
4 changes: 1 addition & 3 deletions paddle/phi/api/lib/api_gen_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -539,11 +539,9 @@ phi::distributed::DistTensor* SetKernelDistOutput(Tensor* out) {
if (out) {
// TODO(chenweihang): now all dist case are nullptr
if (out->impl() == nullptr) {
auto dense_t = std::make_shared<phi::DenseTensor>();
// TODO(chenweihang): polish code, dist_attr is null now
auto dist_attr = std::make_shared<phi::distributed::TensorDistAttr>();
auto dist_t = std::make_shared<phi::distributed::DistTensor>(
dense_t, phi::DenseTensorMeta(), dist_attr);
phi::DDim(), phi::distributed::TensorDistAttr());
out->set_impl(dist_t);
}
return static_cast<phi::distributed::DistTensor*>(out->impl().get());
Expand Down
Loading

0 comments on commit 8495377

Please sign in to comment.