From 0f0992667c51f4a9cdfed09be41d33f18900c928 Mon Sep 17 00:00:00 2001 From: Xin Huang Date: Fri, 15 May 2015 18:32:36 +0800 Subject: [PATCH 1/2] avx implementation for (s/d)log10 --- include/openvml.h | 3 ++ include/openvml_kernel.h | 8 ++- include/openvml_macros.h | 11 +++- include/openvml_reference.h | 3 ++ interface/CMakeLists.txt | 2 +- interface/log10.c | 39 ++++++++++++++ kernel/CMakeLists.txt | 2 +- kernel/aarch64/Kernel_generic.txt | 3 ++ kernel/arm/Kernel_generic.txt | 4 +- kernel/generic/Kernel_generic.txt | 3 ++ kernel/generic/log10_kernel.c | 40 ++++++++++++++ kernel/x86_64/Kernel_generic.txt | 2 + kernel/x86_64/Kernel_haswell.txt | 3 ++ kernel/x86_64/Kernel_sandybridge.txt | 3 ++ kernel/x86_64/dlog10_kernel_avx.c | 78 ++++++++++++++++++++++++++++ kernel/x86_64/slog10_kernel_avx.c | 63 ++++++++++++++++++++++ reference/CMakeLists.txt | 1 + reference/vlog10.c | 48 +++++++++++++++++ test/CMakeLists.txt | 1 + test/test_log10.c | 55 ++++++++++++++++++++ 20 files changed, 367 insertions(+), 5 deletions(-) create mode 100644 interface/log10.c create mode 100644 kernel/generic/log10_kernel.c create mode 100644 kernel/x86_64/dlog10_kernel_avx.c create mode 100644 kernel/x86_64/slog10_kernel_avx.c create mode 100644 reference/vlog10.c create mode 100644 test/test_log10.c diff --git a/include/openvml.h b/include/openvml.h index dc8f8e6..36cdba5 100644 --- a/include/openvml.h +++ b/include/openvml.h @@ -52,6 +52,9 @@ OPENVML_EXPORT void OpenVML_FUNCNAME(vdPow)(VML_INT n, const double * a, const d OPENVML_EXPORT void OpenVML_FUNCNAME(vsExp)(VML_INT n, const float * a, float * y); OPENVML_EXPORT void OpenVML_FUNCNAME(vdExp)(VML_INT n, const double * a, double * y); +OPENVML_EXPORT void OpenVML_FUNCNAME(vsLog10)(VML_INT n, const float * a, float * y); +OPENVML_EXPORT void OpenVML_FUNCNAME(vdLog10)(VML_INT n, const double * a, double * y); + OPENVML_EXPORT void OpenVML_FUNCNAME(vsTanh)(VML_INT n, const float * a, float * y); OPENVML_EXPORT void OpenVML_FUNCNAME(vdTanh)(VML_INT n, const double * a, double * y); diff --git a/include/openvml_kernel.h b/include/openvml_kernel.h index a397185..edfbf5f 100644 --- a/include/openvml_kernel.h +++ b/include/openvml_kernel.h @@ -54,9 +54,15 @@ void OpenVML_FUNCNAME(cexp_k)(VMLLONG n, float * a, float * b, float * y, float void OpenVML_FUNCNAME(zexp_k)(VMLLONG n, double * a, double * b, double * y, double * z, double * other_params); +void OpenVML_FUNCNAME(slog10_k)(VMLLONG n, float * a, float * b, float * y, float * z, float * other_params); +void OpenVML_FUNCNAME(dlog10_k)(VMLLONG n, double * a, double * b, double * y, double * z, double * other_params); +void OpenVML_FUNCNAME(clog10_k)(VMLLONG n, float * a, float * b, float * y, float * z, float * other_params); +void OpenVML_FUNCNAME(zlog10_k)(VMLLONG n, double * a, double * b, double * y, double * z, double * other_params); + + void OpenVML_FUNCNAME(stanh_k)(VMLLONG n, float * a, float * b, float * y, float * z, float * other_params); void OpenVML_FUNCNAME(dtanh_k)(VMLLONG n, double * a, double * b, double * y, double * z, double * other_params); void OpenVML_FUNCNAME(ctanh_k)(VMLLONG n, float * a, float * b, float * y, float * z, float * other_params); void OpenVML_FUNCNAME(ztanh_k)(VMLLONG n, double * a, double * b, double * y, double * z, double * other_params); -#endif \ No newline at end of file +#endif diff --git a/include/openvml_macros.h b/include/openvml_macros.h index 7c42a94..18a4134 100644 --- a/include/openvml_macros.h +++ b/include/openvml_macros.h @@ -53,6 +53,11 @@ #define CEXP_K OpenVML_FUNCNAME(cexp_k) #define ZEXP_K OpenVML_FUNCNAME(zexp_k) +#define SLOG10_K OpenVML_FUNCNAME(slog10_k) +#define DLOG10_K OpenVML_FUNCNAME(dlog10_k) +#define CLOG10_K OpenVML_FUNCNAME(clog10_k) +#define ZLOG10_K OpenVML_FUNCNAME(zlog10_k) + #define STANH_K OpenVML_FUNCNAME(stanh_k) #define DTANH_K OpenVML_FUNCNAME(dtanh_k) @@ -66,12 +71,14 @@ #define SUB_K SSUB_K #define POW_K SPOW_K #define EXP_K SEXP_K +#define LOG10_K SLOG10_K #define TANH_K STANH_K #else #define ADD_K DADD_K #define SUB_K DSUB_K #define POW_K DPOW_K #define EXP_K DEXP_K +#define LOG10_K DLOG10_K #define TANH_K DTANH_K #endif #else @@ -80,14 +87,16 @@ #define SUB_K CSUB_K #define POW_K CPOW_K #define EXP_K CEXP_K +#define LOG10_K CLOG10_K #define TANH_K CTANH_K #else #define ADD_K ZADD_K #define SUB_K ZSUB_K #define POW_K ZPOW_K #define EXP_K ZEXP_K +#define LOG10_K ZLOG10_K #define TANH_K ZTANH_K #endif #endif -#endif \ No newline at end of file +#endif diff --git a/include/openvml_reference.h b/include/openvml_reference.h index bc8f4cf..43badf5 100644 --- a/include/openvml_reference.h +++ b/include/openvml_reference.h @@ -52,6 +52,9 @@ OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vdPow)(VML_INT n, const double * a, con OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vsExp)(VML_INT n, const float * a, float * y); OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vdExp)(VML_INT n, const double * a, double * y); +OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vsLog10)(VML_INT n, const float * a, float * y); +OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vdLog10)(VML_INT n, const double * a, double * y); + OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vsTanh)(VML_INT n, const float * a, float * y); OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vdTanh)(VML_INT n, const double * a, double * y); diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index c3e457a..9ab004f 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -5,7 +5,7 @@ set(OpenVML_LIBSRC_D "") set(OpenVML_LIBSRC_C "") set(OpenVML_LIBSRC_Z "") -set(REAL_INTERFACE_LIST add sub pow exp tanh) +set(REAL_INTERFACE_LIST add sub pow exp tanh log10) set(COMPLEX_INTERFACE_LIST add sub) function(cap_string var_name var_name_cap) diff --git a/interface/log10.c b/interface/log10.c new file mode 100644 index 0000000..5ba69eb --- /dev/null +++ b/interface/log10.c @@ -0,0 +1,39 @@ +/* * Copyright (c) 2014, 2015 Zhang Xianyi + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, this + * list of conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + + +void CNAME(VML_INT n, const VML_FLOAT * a, VML_FLOAT * y) { + + if (n<=0) return; + if (a==NULL || y==NULL) return; + + + EXEC_VML(0, LOG10_K, n, (VML_FLOAT*)a, NULL, y, NULL, NULL); + +} diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 2ddfd75..e9d7f68 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -6,7 +6,7 @@ set(OpenVML_LIBSRC_D "") set(OpenVML_LIBSRC_C "") set(OpenVML_LIBSRC_Z "") -set(KERNEL_LIST add sub pow exp tanh) #s,d +set(KERNEL_LIST add sub pow exp tanh log10) #s,d set(Z_KERNEL_LIST add sub) #c,z ######## s,d kernels diff --git a/kernel/aarch64/Kernel_generic.txt b/kernel/aarch64/Kernel_generic.txt index 12b75a3..5645f5a 100644 --- a/kernel/aarch64/Kernel_generic.txt +++ b/kernel/aarch64/Kernel_generic.txt @@ -14,6 +14,9 @@ set(pow_D_KERNEL_SOURCE generic/pow_kernel.c) set(exp_S_KERNEL_SOURCE generic/exp_kernel.c) set(exp_D_KERNEL_SOURCE generic/exp_kernel.c) +set(log10_S_KERNEL_SOURCE generic/log10_kernel.c) +set(log10_D_KERNEL_SOURCE generic/log10_kernel.c) + set(tanh_S_KERNEL_SOURCE generic/tanh_kernel.c) set(tanh_D_KERNEL_SOURCE generic/tanh_kernel.c) diff --git a/kernel/arm/Kernel_generic.txt b/kernel/arm/Kernel_generic.txt index 12b75a3..23507ef 100644 --- a/kernel/arm/Kernel_generic.txt +++ b/kernel/arm/Kernel_generic.txt @@ -14,6 +14,8 @@ set(pow_D_KERNEL_SOURCE generic/pow_kernel.c) set(exp_S_KERNEL_SOURCE generic/exp_kernel.c) set(exp_D_KERNEL_SOURCE generic/exp_kernel.c) +set(exp_S_KERNEL_SOURCE generic/log10_kernel.c) +set(exp_D_KERNEL_SOURCE generic/log10_kernel.c) + set(tanh_S_KERNEL_SOURCE generic/tanh_kernel.c) set(tanh_D_KERNEL_SOURCE generic/tanh_kernel.c) - diff --git a/kernel/generic/Kernel_generic.txt b/kernel/generic/Kernel_generic.txt index 1fc1e91..acb2a49 100644 --- a/kernel/generic/Kernel_generic.txt +++ b/kernel/generic/Kernel_generic.txt @@ -16,6 +16,9 @@ set(pow_D_KERNEL_SOURCE ${OpenVML_ARCH}/pow_kernel.c) set(exp_S_KERNEL_SOURCE ${OpenVML_ARCH}/exp_kernel.c) set(exp_D_KERNEL_SOURCE ${OpenVML_ARCH}/exp_kernel.c) +set(log10_S_KERNEL_SOURCE ${OpenVML_ARCH}/log10_kernel.c) +set(log10_D_KERNEL_SOURCE ${OpenVML_ARCH}/log10_kernel.c) + set(tanh_S_KERNEL_SOURCE ${OpenVML_ARCH}/tanh_kernel.c) set(tanh_D_KERNEL_SOURCE ${OpenVML_ARCH}/tanh_kernel.c) diff --git a/kernel/generic/log10_kernel.c b/kernel/generic/log10_kernel.c new file mode 100644 index 0000000..cdaa1e9 --- /dev/null +++ b/kernel/generic/log10_kernel.c @@ -0,0 +1,40 @@ +/* * Copyright (c) 2014, 2015 Zhang Xianyi + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, this + * list of conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "openvml_kernel.h" + +#ifndef DOUBLE +#define LOG10 log10f +#else +#define LOG10 log10 +#endif + +void KERNEL_NAME(VMLLONG n, VML_FLOAT * a, VML_FLOAT * b, VML_FLOAT * y, VML_FLOAT * z, VML_FLOAT * other_params) { + VMLLONG i=0; + for(i=0; i +#include "openvml_kernel.h" + +void KERNEL_NAME(VMLLONG n, VML_FLOAT * a, VML_FLOAT * b, VML_FLOAT * y, VML_FLOAT * z, VML_FLOAT * other_params) { + VMLLONG loop_count=(COMPSIZE*n) >> 5; + VMLLONG remain_count=(COMPSIZE*n) & 0x1f; + + int i=0; + + while(loop_count>0){ + + __m256d av0=_mm256_loadu_pd(a); + __m256d av1=_mm256_loadu_pd(a+4); + __m256d av2=_mm256_loadu_pd(a+8); + __m256d av3=_mm256_loadu_pd(a+12); + + __m256d av4=_mm256_loadu_pd(a+16); + __m256d av5=_mm256_loadu_pd(a+20); + __m256d av6=_mm256_loadu_pd(a+24); + __m256d av7=_mm256_loadu_pd(a+28); + + + + + __m256d yv0=_mm256_log10_pd(av0); + __m256d yv1=_mm256_log10_pd(av1); + __m256d yv2=_mm256_log10_pd(av2); + __m256d yv3=_mm256_log10_pd(av3); + + __m256d yv4=_mm256_log10_pd(av4); + __m256d yv5=_mm256_log10_pd(av5); + __m256d yv6=_mm256_log10_pd(av6); + __m256d yv7=_mm256_log10_pd(av7); + + _mm256_storeu_pd(y, yv0); + _mm256_storeu_pd(y+4, yv1); + _mm256_storeu_pd(y+8, yv2); + _mm256_storeu_pd(y+12, yv3); + + _mm256_storeu_pd(y+16, yv4); + _mm256_storeu_pd(y+20, yv5); + _mm256_storeu_pd(y+24, yv6); + _mm256_storeu_pd(y+28, yv7); + + a+=32; + y+=32; + loop_count--; + } + + for(i=0; i +#include "openvml_kernel.h" + +void KERNEL_NAME(VMLLONG n, VML_FLOAT * a, VML_FLOAT * b, VML_FLOAT * y, VML_FLOAT * z, VML_FLOAT * other_params) { + void KERNEL_NAME(VMLLONG n, VML_FLOAT * a, VML_FLOAT * b, VML_FLOAT * y, VML_FLOAT * z, VML_FLOAT * other_params) { + VMLLONG loop_count=(COMPSIZE*n) >> 5; + VMLLONG remain_count=(COMPSIZE*n) & 0x1f; + + int i=0; + + while(loop_count>0){ + + __m256 av0=_mm256_loadu_ps(a); + __m256 av1=_mm256_loadu_ps(a+8); + __m256 av2=_mm256_loadu_ps(a+16); + __m256 av3=_mm256_loadu_ps(a+24); + + + __m256 yv0=_mm256_log10_ps(av0); + __m256 yv1=_mm256_log10_ps(av1); + __m256 yv2=_mm256_log10_ps(av2); + __m256 yv3=_mm256_log10_ps(av3); + + + _mm256_storeu_ps(y, yv0); + _mm256_storeu_ps(y+8, yv1); + _mm256_storeu_ps(y+16, yv2); + _mm256_storeu_ps(y+24, yv3); + + a+=32; + y+=32; + loop_count--; + } + + for(i=0; i +#include +#include + +void OpenVML_FUNCNAME_REF(vsLog10)(VML_INT n, const float * a, float * y){ + VML_INT i; + if (n<=0) return; + if (a==NULL || y==NULL) return; + + for(i=0; i +#include +#include + +static char* funcname[4]={"vsLog10", "vdLog10", NULL,NULL}; +static double flop_per_elem[4]={0.0, 0.0, 0.0, 0.0}; + +static a_y_func_t ref_vLog10[] = { + (a_y_func_t)OpenVML_FUNCNAME_REF(vsLog10), + (a_y_func_t)OpenVML_FUNCNAME_REF(vdLog10), + NULL, + NULL, +}; + +static a_y_func_t test_vLog10[] = { + (a_y_func_t)OpenVML_FUNCNAME(vsLog10), + (a_y_func_t)OpenVML_FUNCNAME(vdLog10), + NULL, + NULL, +}; + + +CTEST2(check_result_s, log10){ + run_test_a_y(data->parameter, funcname, test_vLog10, ref_vLog10, flop_per_elem); +} + +CTEST2(check_result_d, log10){ + run_test_a_y(data->parameter, funcname, test_vLog10, ref_vLog10, flop_per_elem); +} From 25a76515d6d0cf78dc46f22e333682765d34532d Mon Sep 17 00:00:00 2001 From: Xin Huang Date: Fri, 15 May 2015 19:55:07 +0800 Subject: [PATCH 2/2] fix configuration for arm/Kernel_generic.txt --- kernel/arm/Kernel_generic.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/arm/Kernel_generic.txt b/kernel/arm/Kernel_generic.txt index 23507ef..355b610 100644 --- a/kernel/arm/Kernel_generic.txt +++ b/kernel/arm/Kernel_generic.txt @@ -14,8 +14,8 @@ set(pow_D_KERNEL_SOURCE generic/pow_kernel.c) set(exp_S_KERNEL_SOURCE generic/exp_kernel.c) set(exp_D_KERNEL_SOURCE generic/exp_kernel.c) -set(exp_S_KERNEL_SOURCE generic/log10_kernel.c) -set(exp_D_KERNEL_SOURCE generic/log10_kernel.c) +set(log10_S_KERNEL_SOURCE generic/log10_kernel.c) +set(log10_D_KERNEL_SOURCE generic/log10_kernel.c) set(tanh_S_KERNEL_SOURCE generic/tanh_kernel.c) set(tanh_D_KERNEL_SOURCE generic/tanh_kernel.c)