diff --git a/code/arm/Makefile b/code/arm/Makefile index ac54564..65561ba 100644 --- a/code/arm/Makefile +++ b/code/arm/Makefile @@ -1,4 +1,4 @@ -all: gemm_cblas gemm_armcl gemm conv sparse +all: gemm_armpl gemm_oblas gemm_armcl gemm conv sparse BIN_DIR?=bin EIGEN_PATH?=/usr/local/eigen @@ -14,11 +14,20 @@ GEMMLOWP_PATH=$(GEMMLOWP_INCLUDE_PATH)/../ ARMPL_PATH?=/usr/local/armpl/ ARMPL_INCLUDE_PATH=$(ARMPL_PATH)/include/ ARMPL_LIB_PATH=$(ARMPL_PATH)/lib/ + +OPENBLAS_PATH?=/usr/local/openblas/ +OPENBLAS_INCLUDE_PATH=$(OPENBLAS_PATH)/include/ +OPENBLAS_LIB_PATH=$(OPENBLAS_PATH)/lib/ + KERNELS_DIR=../kernels/ -gemm_cblas: cblas_bench.cpp +gemm_armpl: cblas_bench.cpp + $(MKDIR) $(BIN_DIR) + g++ -O3 -fopenmp --std=c++11 -I $(KERNELS_DIR) -I $(ARMPL_INCLUDE_PATH) -L $(ARMPL_LIB_PATH) -DUSE_ARMPL -o bin/gemm_armpl_bench $< -larmpl_mp -lgfortran + +gemm_oblas: cblas_bench.cpp $(MKDIR) $(BIN_DIR) - g++-7 -O3 -fopenmp --std=c++11 -I $(KERNELS_DIR) -I $(ARMPL_INCLUDE_PATH) -L $(ARMPL_LIB_PATH) -o bin/gemm_cblas_bench $< -larmpl -lgfortran + g++ -O3 -fopenmp --std=c++11 -I $(KERNELS_DIR) -I $(OPENBLAS_INCLUDE_PATH) -L $(OPENBLAS_LIB_PATH) -DUSE_OPENBLAS -o bin/gemm_oblas_bench $< -lopenblas gemm_armcl: armcl_bench.cpp $(MKDIR) $(BIN_DIR) diff --git a/code/arm/cblas_bench.cpp b/code/arm/cblas_bench.cpp index 3e2d03e..0d71a7a 100644 --- a/code/arm/cblas_bench.cpp +++ b/code/arm/cblas_bench.cpp @@ -31,7 +31,12 @@ #include #include +#ifdef USE_ARMPL #include +#elif defined(USE_OPENBLAS) +#include +#endif + #include "gemm_problems.h" #define FIX_LD(x) x @@ -77,7 +82,7 @@ int main(int argc, char *argv[]) B_TYPE *B; C_TYPE *C, co = 0; float alpha = 1.0, beta = 1.0; - double flops, total_flops = 0., st_time, end_time, ave_time, total_time = 0.; + double flops, total_flops = 0., ave_time, total_time = 0.; // DEFAULT settings int REPEAT = 10; // Default matrix test size if we are doing a single test @@ -239,17 +244,17 @@ int main(int argc, char *argv[]) alpha, A, p_gemm_params[i].lda, B, p_gemm_params[i].ldb, beta, C, p_gemm_params[i].ldc); // Start measurment - st_time = dsecnd_(); + auto st_time = std::chrono::steady_clock::now(); for (j = 0; j < REPEAT; ++j) { cblas_sgemm(CblasColMajor, p_gemm_params[i].transa, p_gemm_params[i].transb, p_gemm_params[i].m, p_gemm_params[i].n, p_gemm_params[i].k, alpha, A, p_gemm_params[i].lda, B, p_gemm_params[i].ldb, beta, C, p_gemm_params[i].ldc); } - end_time = dsecnd_(); + auto end_time = std::chrono::steady_clock::now(); flops = 2.*p_gemm_params[i].m*p_gemm_params[i].n*p_gemm_params[i].k; total_flops += flops; - ave_time = 1E6*(end_time - st_time)/REPEAT; + ave_time = std::chrono::duration(end_time - st_time).count() / REPEAT; total_time += ave_time; printf("SGEMM,%s,%s,%d,%d,%d,%.1f,%.5f\n",