Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

【complex op】 No.10 add complex support for exp/expm1 #56398

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions paddle/phi/common/complex.h
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,16 @@ HOSTDEVICE inline complex<T> tanh(const complex<T>& a) {
#endif
}

template <typename T>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里之前的pr应该已经加了exp, 可以同步一下最新的代码,不要加重复了

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

好的

HOSTDEVICE inline complex<T> exp(const complex<T>& a) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
(defined(__CUDA_ARCH__) || defined(__HIPCC__))
return complex<T>(thrust::exp(thrust::complex<T>(a)));
#else
return complex<T>(std::exp(std::complex<T>(a)));
#endif
}

template <typename T>
HOSTDEVICE inline complex<T> conj(const complex<T>& a) {
#if defined(PADDLE_WITH_CUDA_OR_HIP_COMPLEX) && \
Expand Down
8 changes: 6 additions & 2 deletions paddle/phi/kernels/cpu/activation_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -340,15 +340,19 @@ PD_REGISTER_KERNEL(exp_grad,
float,
double,
int,
int64_t) {}
int64_t,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}

PD_REGISTER_KERNEL(expm1_grad,
CPU,
ALL_LAYOUT,
phi::Expm1GradKernel,
float,
double,
phi::dtype::float16) {}
phi::dtype::float16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}

PD_REGISTER_KERNEL(
logit_grad, CPU, ALL_LAYOUT, phi::LogitGradKernel, float, double) {}
Expand Down
8 changes: 6 additions & 2 deletions paddle/phi/kernels/cpu/activation_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,9 @@ PD_REGISTER_KERNEL(exp,
double,
int,
int64_t,
phi::dtype::float16) {}
phi::dtype::float16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}

PD_REGISTER_KERNEL(expm1,
CPU,
Expand All @@ -221,7 +223,9 @@ PD_REGISTER_KERNEL(expm1,
double,
int,
int64_t,
phi::dtype::float16) {}
phi::dtype::float16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}

PD_REGISTER_KERNEL(logit, CPU, ALL_LAYOUT, phi::LogitKernel, float, double) {}
PD_REGISTER_KERNEL(
Expand Down
95 changes: 95 additions & 0 deletions paddle/phi/kernels/funcs/activation_functor.h
Original file line number Diff line number Diff line change
Expand Up @@ -1167,6 +1167,30 @@ struct ExpGradFunctor : public BaseActivationFunctor<T> {
}
};

template <typename T>
struct ExpGradFunctor<ComplexType<T>>
: public BaseActivationFunctor<ComplexType<T>> {
template <typename Device,
typename X,
typename Out,
typename dOut,
typename dX>
void operator()(Device d, X x UNUSED, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * out.unaryExpr(Conj<T>());
}

static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};

template <typename T>
struct Expm1 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里为什么要单独定义Expm1呢,按理说这里本身是定义functor的, 这个也相当于是定义了expm1的运算,与下面的有点重复,如果是想定义复数expm1运算,建议放到complex.h里面

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

您好,因为 trust 不支持 expm1 算子,并且 C++ 中的 expm1 不支持复数类型,所以 expm1 的复数实现需要用 exp 的复数实现来复合。之前我是放在 complex.h 里的,但是之前 @ScottWong98 建议 C++ 中不支持的放在 activation_functor.h 中
image

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

好的,但是确定需要自己额外补充定义expm1吗,我看下面非复数的expm1也是调用的函数,如果确认需要的话,可以在activation_functor中定义,但是需要是复数的特化,不要代表所有的类型

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

确实是我的疏忽,应该只需要定义复数的特化,感谢您的建议~

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

已经修改了,麻烦 review 一下 @GGBond8488

HOSTDEVICE T operator()(const T& val) const {
return exp(val) - static_cast<T>(1);
}
};

// expm1(x) = e^x - 1
template <typename T>
struct Expm1Functor : public BaseActivationFunctor<T> {
Expand All @@ -1178,6 +1202,15 @@ struct Expm1Functor : public BaseActivationFunctor<T> {
}
};

template <typename T>
struct Expm1Functor<ComplexType<T>>
: public BaseActivationFunctor<ComplexType<T>> {
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) = x.unaryExpr(Expm1<ComplexType<T>>()).eval();
}
};

template <typename T>
struct Expm1GradFunctor : public BaseActivationFunctor<T> {
template <typename Device,
Expand All @@ -1194,6 +1227,21 @@ struct Expm1GradFunctor : public BaseActivationFunctor<T> {
}
};

template <typename T>
struct Expm1GradFunctor<ComplexType<T>>
: public BaseActivationFunctor<ComplexType<T>> {
template <typename Device,
typename X,
typename Out,
typename dOut,
typename dX>
void operator()(Device d, X x UNUSED, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * out.unaryExpr(Conj<T>()) + dout;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

请教一下,为什么这里 expm1 (exp - 1) 的 梯度是 dout*(exp + 1) :)

Copy link
Contributor Author

@Wanglongzhi2001 Wanglongzhi2001 Sep 1, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

事实上我也不是很理解,tensorflow 的梯度实现也是不用加上这个 dout 的:
https://github.com/tensorflow/tensorflow/blob/f82986df65bea201e5aa466e6993504372132cec/tensorflow/python/ops/math_grad.py#L688-L695

但是我看到 paddle 的其他数据类型的梯度实现是加上了这个 dout,并且我不加上这个 dout 确实梯度误差检查过不了,所以我就加上了,我也想请教下 paddle 的前辈们是不是 paddle 的算子梯度的实现这块不一样导致这样的结果。

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

exp(x)-1的grad是exp(x),exp(x)=(exp(x)-1)+1=out+1

}

static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
};

// relu(x) = max(x, 0)
template <typename T>
struct ReluCPUFunctor : public BaseActivationFunctor<T> {
Expand Down Expand Up @@ -2790,6 +2838,16 @@ struct CudaExpFunctor<double> : public BaseActivationFunctor<double> {
}
};

template <typename T>
struct CudaExpFunctor<ComplexType<T>>
: public BaseActivationFunctor<ComplexType<T>> {
// exp(x) = exp(x)
__device__ __forceinline__ ComplexType<T> operator()(
const ComplexType<T> x) const {
return static_cast<ComplexType<T>>(exp(x));
}
};

template <typename T>
struct CudaSeluFunctor : public BaseActivationFunctor<T> {
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
Expand Down Expand Up @@ -2866,6 +2924,20 @@ struct CudaExpGradFunctor : public BaseActivationFunctor<T> {
}
};

template <typename T>
struct CudaExpGradFunctor<ComplexType<T>>
: public BaseActivationFunctor<ComplexType<T>> {
// dx = dout * exp(x)
__device__ __forceinline__ ComplexType<T> operator()(
const ComplexType<T> dout, const ComplexType<T> out) const {
return static_cast<ComplexType<T>>(dout * conj(out));
}

static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};

template <typename T>
struct CudaReciprocalFunctor : public BaseActivationFunctor<T> {
using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
Expand Down Expand Up @@ -2906,6 +2978,15 @@ struct CudaExpm1Functor<double> : public BaseActivationFunctor<double> {
}
};

template <typename T>
struct CudaExpm1Functor<ComplexType<T>>
: public BaseActivationFunctor<ComplexType<T>> {
__device__ __forceinline__ ComplexType<T> operator()(
const ComplexType<T> x) const {
return static_cast<ComplexType<T>>(Expm1<ComplexType<T>>()(x));
}
};

template <typename T>
struct CudaExpm1GradFunctor : public BaseActivationFunctor<T> {
// dx = dout * out
Expand All @@ -2918,6 +2999,20 @@ struct CudaExpm1GradFunctor : public BaseActivationFunctor<T> {
}
};

template <typename T>
struct CudaExpm1GradFunctor<ComplexType<T>>
: public BaseActivationFunctor<ComplexType<T>> {
// dx = dout * exp(x)
__device__ __forceinline__ ComplexType<T> operator()(
const ComplexType<T> dout, const ComplexType<T> out) const {
return static_cast<ComplexType<T>>(dout * conj(out) + dout);
}

static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};

template <typename T>
struct CudaSinFunctor : public BaseActivationFunctor<T> {
using MPType = typename phi::dtype::MPTypeTrait<T>::Type;
Expand Down
8 changes: 6 additions & 2 deletions paddle/phi/kernels/gpu/activation_grad_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,9 @@ PD_REGISTER_KERNEL(exp_grad,
int,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16) {}
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}

PD_REGISTER_ACTIVATION_GRAD_KERNEL(softshrink_grad, SoftShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_shrink_grad, HardShrinkGradKernel)
Expand All @@ -415,7 +417,9 @@ PD_REGISTER_KERNEL(expm1_grad,
float,
double,
phi::dtype::float16,
phi::dtype::bfloat16) {}
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}

PD_REGISTER_KERNEL(square_grad,
GPU,
Expand Down
10 changes: 8 additions & 2 deletions paddle/phi/kernels/gpu/activation_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,10 @@ PD_REGISTER_KERNEL(exp,
int,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16) {}
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}

PD_REGISTER_KERNEL(expm1,
GPU,
ALL_LAYOUT,
Expand All @@ -271,7 +274,10 @@ PD_REGISTER_KERNEL(expm1,
int,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16) {}
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}

PD_REGISTER_KERNEL(square,
GPU,
ALL_LAYOUT,
Expand Down
11 changes: 10 additions & 1 deletion python/paddle/tensor/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,16 @@ def expm1(x, name=None):
check_variable_and_dtype(
x,
'x',
['float16', 'uint16', 'float32', 'float64', 'int32', 'int64'],
[
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

该方法的 docstring 也相应修改一下

'float16',
'uint16',
'float32',
'float64',
'int32',
'int64',
'complex64',
'complex128',
],
'expm1',
)
helper = LayerHelper('expm1', **locals())
Expand Down
57 changes: 54 additions & 3 deletions test/legacy_test/test_activation_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,46 @@ def init_dtype(self):
self.dtype = np.float64


class TestExpPrim_ZeroDim(TestExpFp32_Prim):
class TestExp_Complex64(OpTest):
def setUp(self):
self.op_type = "exp"
self.python_api = paddle.exp
self.public_python_api = paddle.exp
self.init_dtype()
self.init_shape()
self.if_enable_cinn()
np.random.seed(1024)
x = (
np.random.uniform(-1, 1, self.shape)
+ 1j * np.random.uniform(-1, 1, self.shape)
).astype(self.dtype)
out = np.exp(x)
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.outputs = {'Out': out}
self.convert_input_output()

def test_check_output(self):
self.check_output()

def test_check_grad(self):
self.check_grad(['X'], 'Out', max_relative_error=0.006)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里的 max_relative_error 对于 complex64 和 complex128 都需要嘛

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

是的


def init_dtype(self):
self.dtype = np.complex64

def init_shape(self):
self.shape = []
self.shape = [10, 12]

def if_enable_cinn(self):
pass

def convert_input_output(self):
pass


class TestExp_Complex128(TestExp_Complex64):
def init_dtype(self):
self.dtype = np.complex128


class Test_Exp_Op_Fp16(unittest.TestCase):
Expand Down Expand Up @@ -189,9 +226,13 @@ def setUp(self):
self.python_api = paddle.expm1
self.init_dtype()
self.init_shape()

np.random.seed(2049)
x = np.random.uniform(0.1, 1, self.shape).astype(self.dtype)
if self.dtype == np.complex64 or self.dtype == np.complex128:
x = (
np.random.uniform(-1, 1, self.shape)
+ 1j * np.random.uniform(-1, 1, self.shape)
).astype(self.dtype)
out = np.expm1(x)

self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
Expand All @@ -205,6 +246,16 @@ def test_check_output(self):
self.check_output()


class TestExpm1_Complex64(TestExpm1):
def init_dtype(self):
self.dtype = np.complex64


class TestExpm1_Complex128(TestExpm1):
def init_dtype(self):
self.dtype = np.complex128


class TestExpm1_ZeroDim(TestExpm1):
def init_shape(self):
self.shape = []
Expand Down