Skip to content

Commit

Permalink
Merge pull request OpenMathLib#4976 from martin-frbg/m3m_exprec
Browse files Browse the repository at this point in the history
[WIP]Add better workaround for GEMM3M on GENERIC and re-enable EXPRECISION for x86/x86_64 targets
  • Loading branch information
martin-frbg authored Dec 30, 2024
2 parents 73527aa + c125866 commit 718fb73
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 11 deletions.
18 changes: 8 additions & 10 deletions Makefile.system
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ endif

ifeq ($(OSNAME), Linux)
EXTRALIB += -lm
NO_EXPRECISION = 1
#NO_EXPRECISION = 1
endif

ifeq ($(OSNAME), Android)
Expand Down Expand Up @@ -572,7 +572,7 @@ NO_BINARY_MODE = 1
endif

ifeq ($(CORE), generic)
NO_EXPRECISION = 1
#NO_EXPRECISION = 1
endif

ifndef NO_EXPRECISION
Expand All @@ -595,7 +595,7 @@ endif
ifeq ($(ARCH), x86_64)

ifeq ($(CORE), generic)
NO_EXPRECISION = 1
#NO_EXPRECISION = 1
endif

ifndef NO_EXPRECISION
Expand Down Expand Up @@ -828,8 +828,8 @@ BINARY_DEFINED = 1

ifeq ($(F_COMPILER), GFORTRAN)
ifeq ($(C_COMPILER), GCC)
# EXPRECISION = 1
# CCOMMON_OPT += -DEXPRECISION
EXPRECISION = 1
CCOMMON_OPT += -DEXPRECISION
endif
endif
endif
Expand Down Expand Up @@ -1392,17 +1392,15 @@ endif
endif

ifeq ($(F_COMPILER), CRAY)
CCOMMON_OPT += -DF_INTERFACE_CRAYFC
CCOMMON_OPT += -DF_INTERFACE_INTEL
FCOMMON_OPT += -hnopattern
ifdef INTERFACE64
ifneq ($(INTERFACE64), 0)
FCOMMON_OPT += -s integer64
endif
endif
ifeq ($(USE_OPENMP), 1)
FCOMMON_OPT += -fopenmp
else
FCOMMON_OPT += -fno-openmp
ifneq ($(USE_OPENMP), 1)
FCOMMON_OPT += -O noomp
endif
endif

Expand Down
2 changes: 1 addition & 1 deletion interface/gemm.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
#endif

static int (*gemm[])(blas_arg_t *, BLASLONG *, BLASLONG *, IFLOAT *, IFLOAT *, BLASLONG) = {
#ifndef GEMM3M
#if !defined(GEMM3M) || defined(GENERIC)
GEMM_NN, GEMM_TN, GEMM_RN, GEMM_CN,
GEMM_NT, GEMM_TT, GEMM_RT, GEMM_CT,
GEMM_NR, GEMM_TR, GEMM_RR, GEMM_CR,
Expand Down
14 changes: 14 additions & 0 deletions param.h
Original file line number Diff line number Diff line change
Expand Up @@ -4033,6 +4033,8 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout
#define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_N 2
#define XGEMM_DEFAULT_UNROLL_N 1
#define CGEMM3M_DEFAULT_UNROLL_N 2
#define ZGEMM3M_DEFAULT_UNROLL_N 2

#ifdef ARCH_X86
#define SGEMM_DEFAULT_UNROLL_M 2
Expand All @@ -4048,6 +4050,18 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout
#define CGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_M 2
#define XGEMM_DEFAULT_UNROLL_M 1
#define CGEMM3M_DEFAULT_UNROLL_M 2
#define ZGEMM3M_DEFAULT_UNROLL_M 2
#define CGEMM3M_DEFAULT_P 448
#define ZGEMM3M_DEFAULT_P 224
#define XGEMM3M_DEFAULT_P 112
#define CGEMM3M_DEFAULT_Q 224
#define ZGEMM3M_DEFAULT_Q 224
#define XGEMM3M_DEFAULT_Q 224
#define CGEMM3M_DEFAULT_R 12288
#define ZGEMM3M_DEFAULT_R 12288
#define XGEMM3M_DEFAULT_R 12288

#endif

#ifdef ARCH_MIPS
Expand Down

0 comments on commit 718fb73

Please sign in to comment.