diff --git a/.travis.yml b/.travis.yml index 95a63f411..afbc1af47 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,15 +14,17 @@ matrix: - ubuntu-toolchain-r-test packages: - g++-4.9 + - binutils - gfortran-4.9 - libgmp-dev - libgmpxx4ldbl - libtool - autoconf - libopenblas-dev + - os: linux sudo: required - dist: xenial + dist: bionic compiler: gcc env: GCC_VERSION=7 addons: @@ -31,30 +33,33 @@ matrix: - ubuntu-toolchain-r-test packages: - g++-7 + - binutils - gfortran-7 - libgmp-dev - libgmpxx4ldbl - libtool - autoconf - libopenblas-dev + - os: linux sudo: required - dist: xenial + dist: bionic compiler: gcc env: GCC_VERSION=8 - # BLAS_LIBS='--with-blas-libs="-latlas -lcblas"' addons: apt: sources: - ubuntu-toolchain-r-test packages: - g++-8 + - binutils - gfortran-8 - libgmp-dev - libgmpxx4ldbl - libtool - autoconf - libopenblas-dev + # - os: linux # sudo: required # dist: trusty @@ -94,7 +99,7 @@ before_script: - git clone --depth 1 https://github.com/linbox-team/givaro.git && cd givaro && ./autogen.sh && make && sudo make install && cd .. - git clone --depth=1 https://github.com/xianyi/OpenBLAS.git && cd OpenBLAS && make && sudo make PREFIX="/usr/local" install && cd .. - export LD_LIBRARY_PATH=/usr/local/lib - - ./autogen.sh #$BLAS_LIBS + - ./autogen.sh script: - make diff --git a/autotune/Makefile.am b/autotune/Makefile.am index 7c2606442..367b24c4d 100644 --- a/autotune/Makefile.am +++ b/autotune/Makefile.am @@ -20,16 +20,9 @@ # ========LICENCE======== #/ -AM_CXXFLAGS = @DEFAULT_CFLAGS@ -I$(top_srcdir) - -# Forcing compilation without the precompiled libraries for the autotuning only -# if FFLASFFPACK_PRECOMPILED -# AM_LDFLAGS = $(CBLAS_LIBS) $(GIVARO_LIBS) $(CUDA_LIBS) $(PARLIBS) \ -# $(top_builddir)/fflas-ffpack/interfaces/libs/libfflas.la \ -# $(top_builddir)/fflas-ffpack/interfaces/libs/libffpack.la -# else -AM_LDFLAGS = $(CBLAS_LIBS) $(GIVARO_LIBS) $(CUDA_LIBS) $(PARLIBS) -#endif +AM_CXXFLAGS = $(FFLASFFPACK_CXXFLAGS) $(GIVARO_CFLAGS) $(BLAS_CFLAGS) $(PARFLAGS) +AM_CPPFLAGS = -I$(top_srcdir) +LDADD = $(GIVARO_LIBS) $(BLAS_LIBS) $(PARLIBS) AUTOTUNE_FGEMM = winograd-modular-float winograd-modular-double winograd-modularbalanced-float winograd-modularbalanced-double AUTOTUNE_PLUQ = pluq diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am index 161626145..f87d0a747 100755 --- a/benchmarks/Makefile.am +++ b/benchmarks/Makefile.am @@ -23,14 +23,12 @@ SUBDIRS = benchmarks: $(BENCHMARKS) -AM_CXXFLAGS = @DEFAULT_CFLAGS@ -I$(top_srcdir) $(PRECOMPILE_FLAGS) - +AM_CXXFLAGS = $(FFLASFFPACK_CXXFLAGS) $(GIVARO_CFLAGS) $(BLAS_CFLAGS) $(PARFLAGS) $(PRECOMPILE_FLAGS) +AM_CPPFLAGS = -I$(top_srcdir) +LDADD = $(GIVARO_LIBS) $(BLAS_LIBS) $(PARLIBS) if FFLASFFPACK_PRECOMPILED -LDADD = $(CBLAS_LIBS) $(GIVARO_LIBS) $(CUDA_LIBS) $(PARLIBS) \ - $(top_builddir)/fflas-ffpack/interfaces/libs/libfflas.la \ - $(top_builddir)/fflas-ffpack/interfaces/libs/libffpack.la -else -LDADD = $(CBLAS_LIBS) $(GIVARO_LIBS) $(CUDA_LIBS) $(PARLIBS) +LDADD += $(top_builddir)/fflas-ffpack/interfaces/libs/libfflas.la +LDADD += $(top_builddir)/fflas-ffpack/interfaces/libs/libffpack.la endif PERFPUBLISHERFILE=benchmarks-report.xml diff --git a/configure.ac b/configure.ac index d09d30144..f6bdb005e 100644 --- a/configure.ac +++ b/configure.ac @@ -37,177 +37,82 @@ AC_CANONICAL_TARGET AM_INIT_AUTOMAKE([1.10 -Wall -Wno-portability foreign]) AX_PREFIX_CONFIG_H(fflas-ffpack/config.h, __FFLASFFPACK) -AC_LANG([C++]) - -echo "-----------------------------------------------" - -AC_DEBUG -AC_PROFILE -AC_WARNINGS - -echo "-----------------------------------------------" - -# CFLAGS=${CFLAGS:-$DEFAULT_CFLAGS} -# CXXFLAGS=${CXXFLAGS:-$DEFAULT_CXXFLAGS} - -###################################################### -# Try and pass different flags according to compiler # -###################################################### - - -# disable default -g -O2 CXXFLAGS +# Set CXXFLAGS to an empty string if not defined, and save it. We must do it +# before calling AC_PROG_CXX that sets it to '-O2 -g' if not defined : ${CXXFLAGS=""} +ORIGINAL_CXXFLAGS="${CXXFLAGS}" -#set CXX +# We set the language to C++ +AC_LANG([C++]) AC_PROG_CXX -AC_COMPILER_NAME - -# We need a C++11 compiler now - AB 2014-12-12 -# clang-3.8 does not support __float128 without explicitly passing it -std=c++11 -if test "x${CCNAM}" = "xclang38" ; then - AX_CXX_COMPILE_STDCXX_11([noext],[mandatory]) -else - AX_CXX_COMPILE_STDCXX_11([ext],[mandatory]) -fi -# appending CXX11FLAGS for the remaining checks, to avoid clang __float128 undefined bug -# duplicates will be removed later on -CXXFLAGS="$CXXFLAGS $CXX11FLAGS" - -REQUIRED_FLAGS="$CXX11FLAGS" - - -DEBUG_CFLAGS="-g" -DEFAULT_CFLAGS="" -WARN_CFLAGS="-Wall" - -#TODO use -fast for icc, -ipa for eko... -if test "x$USE_DEBUG" = "xyes" ; then - DEFAULT_CFLAGS="-O0 ${DEFAULT_CFLAGS} ${DEBUG_CFLAGS}" #those are CXXFLAGS -else - DEFAULT_CFLAGS="-O2 ${DEFAULT_CFLAGS}" -fi - -if test "x$PROF" = "xyes" ; then - DEFAULT_CFLAGS="${DEFAULT_CFLAGS} -pg" -fi - -if test "x$WARN" = "xyes" -o "x$WARN" = "xfull" ; then - case x${CCNAM} in - xicc) - WARN_CFLAGS="${WARN_CFLAGS} -Wcheck" - WARN_CFLAGS="${WARN_CFLAGS} -Wall -Wno-unused-parameter -Wuninitialized -Wconversion -Wcast-qual -pedantic -Wshadow -Wpointer-arith -Wwrite-strings -Wno-long-long" - WARN_CFLAGS="${WARN_CFLAGS} -Wextra -ansi" - ;; - xeko) - WARN_CFLAGS="${WARN_CFLAGS} -Wno-unused-parameter" - ;; - xgcc|xgcc44) - WARN_CFLAGS="${WARN_CFLAGS} -Wextra -Wno-unused-parameter" - if test "x${WARN}" = "xfull" ; then - WARN_CFLAGS="${WARN_CFLAGS} -Wuninitialized -Wconversion -Wcast-qual -pedantic -Wshadow -Wpointer-arith -Wcast-align -Wwrite-strings -Wno-long-long -Wno-variadic-macros -Wno-vla" - fi - if test "x${HAVE_CXX11}" = "x0" ; then - WARN_CFLAGS="${WARN_CFLAGS} -ansi" - fi - ;; - xgcc48) - WARN_CFLAGS="${WARN_CFLAGS} -Wextra -Wno-unused-parameter" - if test "x${WARN}" = "xfull" ; then - WARN_CFLAGS="${WARN_CFLAGS} -Wuninitialized -Wconversion -Wcast-qual -pedantic -Wshadow -Wpointer-arith -Wcast-align -Wwrite-strings -Wno-long-long -Wno-variadic-macros -Wno-vla" - # WARN_CFLAGS="${WARN_CFLAGS} -fsanitize=address" - fi - if test "x${HAVE_CXX11}" = "x0" ; then - WARN_CFLAGS="${WARN_CFLAGS} -ansi" - fi - - ;; - - xclang) - WARN_CFLAGS="${WARN_CFLAGS} -Wextra -Wno-unused-parameter" - if test "x${WARN}" = "xfull" ; then - WARN_CFLAGS="${WARN_CFLAGS} -Wuninitialized -Wconversion -Wcast-qual -pedantic -Wshadow -Wpointer-arith -Wcast-align -Wwrite-strings -Wno-long-long -Wno-vla-extension -Wno-variadic-macros" - WARN_CFLAGS="${WARN_CFLAGS} -D__STRICT_ANSI__" - fi - ;; - xclang38) - WARN_CFLAGS="${WARN_CFLAGS} -Wextra -Wno-unused-parameter" - if test "x${WARN}" = "xfull" ; then - WARN_CFLAGS="${WARN_CFLAGS} -Wuninitialized -Wconversion -Wcast-qual -pedantic -Wshadow -Wpointer-arith -Wcast-align -Wwrite-strings -Wno-long-long -g -Wno-vla-extension -Wno-variadic-macros" - WARN_CFLAGS="${WARN_CFLAGS} -D__STRICT_ANSI__" - # WARN_CFLAGS="${WARN_CFLAGS} -fsanitize=address" - fi - ;; - - *) - echo - echo "*******************************************************" - echo "unsupported compiler ($CCNAM). Please file a bug." - echo "*******************************************************" - echo - WARN_CFLAGS="${WARN_CFLAGS}" - esac -fi - - -DEFAULT_CFLAGS="${DEFAULT_CFLAGS} ${WARN_CFLAGS} ${DEBUG_CFLAGS}" -AC_SUBST([DEFAULT_CFLAGS]) +# +AM_PROG_AR -AC_HEADER_STDC +# Libtool AC_PROG_LIBTOOL -AC_PROG_EGREP -AC_PROG_SED -# newer libtool... LT_PREREQ([2.4.3]) LT_INIT +# Look for headers +AC_HEADER_STDC +AC_CHECK_HEADERS([float.h limits.h stddef.h stdlib.h string.h sys/time.h stdint.h pthread.h]) -echo "DEFAULT_CFLAGS=${DEFAULT_CFLAGS}" -echo "DEBUG_CFLAGS=${DEBUG_CFLAGS}" -echo "TESTS_CFLAGS=${TESTS_CFLAGS}" -echo "-----------------------------------------------" -echo " START FFLAS-FFPACK CONFIG " -echo "-----------------------------------------------" +# +AC_PROG_EGREP +AC_PROG_SED -FF_CHECK_OMP +################################################# +AS_BOX([ START FFLAS-FFPACK CONFIG ]) +################################################# +AC_COMPILER_NAME -# Checks which SIMD optimization flags to use and set SIMD_CFLAGS accordingly -INSTR_SET -AC_SUBST(SIMD_FLAGS) +# We need a C++11 compiler now - AB 2014-12-12 +# clang-3.8 does not support __float128 without explicitly passing it -std=c++11 +AS_IF([test "x${CCNAM}" = "xclang38"], + [AX_CXX_COMPILE_STDCXX_11([noext],[mandatory])], + [AX_CXX_COMPILE_STDCXX_11([ext],[mandatory])] + ) +REQUIRED_FLAGS="${CXX11FLAGS}" +# Add the c++11 flags for the configure compilations as clang needs it to work properly with a glibc++ compiled with gcc +CXXFLAGS="${CXX11FLAGS} ${CXXFLAGS}" -dnl gcc-4.9.2 bug See https://trac.sagemath.org/ticket/17635#comment:178 -AS_IF([ test "x$CCNAM" = "xgcc492" ],[REQUIRED_FLAGS="${REQUIRED_FLAGS} -fpermissive"],[]) +AS_ECHO([---------------------------------------]) -dnl With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot -dnl have overloads for both types without linking error. -AS_IF([test "x$CCNAM" = "xgcc"],[REQUIRED_FLAGS="${REQUIRED_FLAGS} -fabi-version=6"],[]) +# Set OPTIM_FLAGS, DEBUG_FLAGS depending on compiler and command line arguments +SET_FLAGS -AC_SUBST(REQUIRED_FLAGS) +# Append -march=native to OPTIM_FLAGS if not present in CXXFLAGS and +# not cross-compiling and --no-marchnative is not set +ARCH_FLAGS -PARFLAGS="${OMPFLAGS}" -PARLIBS="${OMPFLAGS}" +# Append -mfpmath=sse to OPTIM_FLAGS on i386 and i686 architecture with SSE +FPMATH_FLAGS -AC_SUBST(PARFLAGS) -AC_SUBST(PARLIBS) +# With GCC 4.x, the default ABI version is 2. With this version, __m128 and +# __m256 are the same types and therefore we cannot have overloads for both +# types without linking error. It is fixed in ABI version 4. +# FIXME: Why do we set ABI version to 6 +AS_CASE([$CCNAM], [gcc4*], [REQUIRED_FLAGS+=" -fabi-version=6"]) -echo "-----------------------------------------------" +dnl gcc-4.9.2 bug See https://trac.sagemath.org/ticket/17635#comment:178 +AS_CASE([$CCNAM], [gcc492], [REQUIRED_FLAGS+=" -fpermissive"]) +AS_ECHO([---------------------------------------]) # Machine characteristics -AC_CHECK_SIZEOF(char, 8) -AC_CHECK_SIZEOF(short, 16) -AC_CHECK_SIZEOF(int, 32) -AC_CHECK_SIZEOF(long, 32) -AC_CHECK_SIZEOF(long long, 64) -AC_CHECK_SIZEOF(__int64, 64) +# Size of some types +AC_CHECK_SIZEOF(char) +AC_CHECK_SIZEOF(short) +AC_CHECK_SIZEOF(int) +AC_CHECK_SIZEOF(long) +AC_CHECK_SIZEOF(long long) +AC_CHECK_SIZEOF(__int64_t) +# Looking for int128 AC_CHECK_TYPE([__int128_t], [AC_DEFINE(HAVE_INT128, 1, [Define that compiler allows int128_t types])]) -# Checks for header files. -AC_HEADER_STDC -AC_CHECK_HEADERS([float.h limits.h stddef.h stdlib.h string.h sys/time.h stdint.h pthread.h]) - # check endianness of the architecture AC_C_BIGENDIAN( @@ -215,41 +120,26 @@ AC_C_BIGENDIAN( [AC_DEFINE(HAVE_LITTLE_ENDIAN, 1, [Define that architecture uses little endian storage])], []) -# Create some useful data types of fixed, known lengths +AS_ECHO([---------------------------------------]) + +# Looking for OpenMP +FF_CHECK_OMP +PARFLAGS="${OMPFLAGS}" +PARLIBS="${OMPFLAGS}" +AC_SUBST(PARFLAGS) +AC_SUBST(PARLIBS) -# AC_DEFINE_UNQUOTED(INT8, $LINBOX_INT8, Canonical 8-bit data type) -# AC_DEFINE_UNQUOTED(INT16, $LINBOX_INT16, Canonical 16-bit data type) -# AC_DEFINE_UNQUOTED(INT32, $LINBOX_INT32, Canonical 32-bit data type) -# AC_DEFINE_UNQUOTED(INT64, $LINBOX_INT64, Canonical 64-bit data type) -echo "-----------------------------------------------" # Feature checks FF_MISC -AC_LANG([C++]) - - -echo "-----------------------------------------------" - -# Getting GMP from Givaro - AB 2014-12-12 -#FF_CHECK_GMP +# Looking for Givaro. We get the flags for GMP at the same time PKG_CHECK_MODULES([GIVARO],[givaro >= 4.1.1]) FF_CHECK_GIVARO_USABILITY() - -dnl FF_CHECK_GIVARO(,,[ -dnl echo '*******************************************************************************' -dnl echo ' WARNING: GIVARO not found!' -dnl echo -dnl echo ' GIVARO library is required for some tests in this library.' -dnl echo ' Please make sure GIVARO is installed and specify its location with the' -dnl echo ' option --with-givaro= when running configure.' -dnl echo ' Do not forget to set/export LD_LIBRARY_PATH if necessary.' -dnl echo '*******************************************************************************' -dnl exit 1 -dnl ]) +AS_ECHO([---------------------------------------]) BLAS_FOUND=false @@ -261,62 +151,24 @@ FF_CHECK_USER_LAPACK FF_OPENBLAS_NUM_THREADS -# FF_CHECK_BLAS - -# FF_CHECK_GOTOBLAS - -# FF_CHECK_GSL - -# if test "$BLAS_FOUND" = "false" ; then - # FF_CHECK_CBLAS -# fi - -# if test "$BLAS_FOUND" = "false" ; then - # FF_CHECK_OTHERBLAS -# fi - -# FF_CHECK_LAPACK - -# if test "$BLAS_FOUND" = "false" ; then - # FF_CHECK_BLAS2 -# fi - - - -# BLAS_LIBS="${BLAS_LIBS}" -# BLAS_LIBS="-L/${BLAS_PATH} ${LAPACK_LIBS} ${BLAS_LIBS}" -# AC_SUBST(BLAS_LIBS) - # FF_CHECK_CUDA -# AM_CONDITIONAL(FFLASFFPACK_HAVE_BLAS, test "x$BLAS_FOUND" != "xfalse") - - -# FF_BENCH - +AS_ECHO([---------------------------------------]) FF_DOC +FF_PRECOMPILE +AS_ECHO([---------------------------------------]) -# if test ! -d ./benchmarks/data ; then - # echo "Creating data dir in benchmark" ; - # mkdir ./benchmarks/data ; -# fi - -CXXFLAGS="${CXXFLAGS} ${REQUIRED_FLAGS}" -CXXFLAGS="${CXXFLAGS} ${SIMD_CFLAGS}" -CXXFLAGS="${CXXFLAGS} ${GIVARO_CFLAGS}" -CXXFLAGS="${CXXFLAGS} ${BLAS_CFLAGS}" -CXXFLAGS="${CXXFLAGS} ${CUDA_CFLAGS}" -CXXFLAGS="${CXXFLAGS} ${PARFLAGS}" - -AC_SUBST(CXXFLAGS) - -FF_PRECOMPILE +CXXFLAGS="${ORIGINAL_CXXFLAGS}" +FFLASFFPACK_CXXFLAGS="${REQUIRED_FLAGS} ${OPTIM_FLAGS} ${DEBUG_FLAGS}" +AC_SUBST(FFLASFFPACK_CXXFLAGS) +AS_ECHO(["FFLASFFPACK_CXXFLAGS = ${FFLASFFPACK_CXXFLAGS}"]) +AC_SUBST(REQUIRED_FLAGS) -echo "-----------------------------------------------" -echo " END FFLAS-FFPACK CONFIG " -echo "-----------------------------------------------" +################################################# +AS_BOX([ END FFLAS-FFPACK CONFIG ]) +################################################# AC_CONFIG_FILES([ Makefile diff --git a/examples/Makefile.am b/examples/Makefile.am index be060d7a0..762f15593 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -23,13 +23,12 @@ SUBDIRS = examples: $(EXAMPLES) -AM_CXXFLAGS = @DEFAULT_CFLAGS@ -I$(top_srcdir) $(PRECOMPILE_FLAGS) +AM_CXXFLAGS = $(FFLASFFPACK_CXXFLAGS) $(GIVARO_CFLAGS) $(BLAS_CFLAGS) $(PARFLAGS) $(PRECOMPILE_FLAGS) +AM_CPPFLAGS = -I$(top_srcdir) +LDADD = $(GIVARO_LIBS) $(BLAS_LIBS) $(PARLIBS) if FFLASFFPACK_PRECOMPILED -LDADD = $(CBLAS_LIBS) $(GIVARO_LIBS) $(CUDA_LIBS) $(PARLIBS) \ - $(top_builddir)/fflas-ffpack/interfaces/libs/libfflas.la \ - $(top_builddir)/fflas-ffpack/interfaces/libs/libffpack.la -else -LDADD = $(CBLAS_LIBS) $(GIVARO_LIBS) $(CUDA_LIBS) $(PARLIBS) +LDADD += $(top_builddir)/fflas-ffpack/interfaces/libs/libfflas.la +LDADD += $(top_builddir)/fflas-ffpack/interfaces/libs/libffpack.la endif EXAMPLES = \ diff --git a/fflas-ffpack-config.in b/fflas-ffpack-config.in index f1dac22cd..3e2f53f42 100644 --- a/fflas-ffpack-config.in +++ b/fflas-ffpack-config.in @@ -51,7 +51,6 @@ Known values for OPTION are: --blas-libs print BLAS library linking information --cflags print pre-processor and compiler flags --blas-cflags print BLAS pre-processor and BLAS compiler flags - --blas-home print where BLAS were found --help display this help and exit --version output version information string (eg @VERSION@) --decimal-version output version in decimal representation (base 100, that is $decvr) @@ -99,23 +98,19 @@ while test $# -gt 0; do ;; --cflags) - echo -I${includedir} @BLAS_CFLAGS@ @CXXFLAGS@ @GIVARO_CFLAGS@ @PRECOMPILE_FLAGS@ @PARFLAGS@ # @CUDA_CFLAGS@ + echo -I${includedir} @BLAS_CFLAGS@ @PARFLAGS@ @PRECOMPILE_FLAGS@ @REQUIRED_FLAGS@ @GIVARO_CFLAGS@ ;; --blas-cflags) - echo -I${includedir} @BLAS_CFLAG@ @AVXFLAGS@ # @PARFLAGS@ # @CUDA_CFLAGS@ + echo -I${includedir} @BLAS_CFLAGS@ ;; --libs) - echo @PARLIBS@ @PRECOMPILE_LIBS@ @BLAS_LIBS@ @GIVARO_LIBS@ # @CUDA_LIBS@ + echo @PARLIBS@ @PRECOMPILE_LIBS@ @BLAS_LIBS@ @GIVARO_LIBS@ ;; --blas-libs) - echo @BLAS_LIBS@ - ;; - - --blas-home) - echo @BLAS_PATH@ + echo @BLAS_LIBS@ ;; *) diff --git a/fflas-ffpack/interfaces/libs/Makefile.am b/fflas-ffpack/interfaces/libs/Makefile.am index 8c567461c..12c9abf3b 100644 --- a/fflas-ffpack/interfaces/libs/Makefile.am +++ b/fflas-ffpack/interfaces/libs/Makefile.am @@ -22,9 +22,9 @@ if FFLASFFPACK_PRECOMPILED pkgincludesubdir=$(pkgincludedir)/interfaces/libs -AM_CXXFLAGS = @DEFAULT_CFLAGS@ -AM_CPPFLAGS = -I$(top_srcdir) #/fflas-ffpack/utils/ -I$(top_srcdir)/fflas-ffpack/fflas/ -I$(top_srcdir)/fflas-ffpack/ffpack -I$(top_srcdir)/fflas-ffpack/field -LDADD = $(CBLAS_LIBS) $(GIVARO_LIBS) $(CUDA_LIBS) $(PARFLAGS) +AM_CXXFLAGS = $(FFLASFFPACK_CXXFLAGS) $(GIVARO_CFLAGS) $(BLAS_CFLAGS) $(PARFLAGS) +AM_CPPFLAGS = -I$(top_srcdir) +LDADD = $(GIVARO_LIBS) $(BLAS_LIBS) $(PARLIBS) #AM_LDFLAGS=-static diff --git a/macros/CodeChunk/Makefile.am b/macros/CodeChunk/Makefile.am index 4fd53cb9a..6cfa09d12 100644 --- a/macros/CodeChunk/Makefile.am +++ b/macros/CodeChunk/Makefile.am @@ -27,8 +27,5 @@ EXTRA_DIST= \ lapack.C \ cblas.C \ fblas.C \ - cuda.C \ - instrset.h \ - instrset_detect.cpp\ - gmp.C + cuda.C diff --git a/macros/CodeChunk/avx.C b/macros/CodeChunk/avx.C deleted file mode 100644 index 2c8bb490d..000000000 --- a/macros/CodeChunk/avx.C +++ /dev/null @@ -1,11 +0,0 @@ -#include -int main() { - __m256d P ; - double p = 0; - P = _mm256_set1_pd(p); - P = _mm256_add_pd(P,P); -#ifdef __try_avx2 - P = _mm256_fnmadd_pd(P,P,P); -#endif - return 0; -} diff --git a/macros/CodeChunk/givaro.C b/macros/CodeChunk/givaro.C deleted file mode 100644 index 1b027168a..000000000 --- a/macros/CodeChunk/givaro.C +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (C) 2013 FFLAS-FFPACK group. - * - * Extirpé form a m4 macro by Brice Boyer (briceboyer) . - * - * - * ========LICENCE======== - * This file is part of the library FFLAS-FFPACK. - * - * FFLAS-FFPACK is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - * ========LICENCE======== - * - */ - -#include -int main () { - if (GIVARO_VERSION < $version_min || GIVARO_VERSION >= $version_max || GIVARO_VERSION>0x030000) - return -1; - else - return 0; /* old version of Givaro are defined as hexa 0x03yyzz*/ -} - diff --git a/macros/CodeChunk/instrset.h b/macros/CodeChunk/instrset.h deleted file mode 100644 index 8204fff29..000000000 --- a/macros/CodeChunk/instrset.h +++ /dev/null @@ -1,216 +0,0 @@ -/**************************** instrset.h ********************************** -* Author: Agner Fog -* Date created: 2012-05-30 -* Last modified: 2016-11-25 -* Version: 1.25 -* Project: vector classes -* Description: -* Header file for various compiler-specific tasks and other common tasks to -* vector class library: -* > selects the supported instruction set -* > defines integer types -* > defines compiler version macros -* > undefines certain macros that prevent function overloading -* > defines template class to represent compile-time integer constant -* > defines template for compile-time error messages -* -* (c) Copyright 2012-2016 GNU General Public License www.gnu.org/licenses -******************************************************************************/ - -#ifndef INSTRSET_H -#define INSTRSET_H 125 - -// Detect 64 bit mode -#if (defined(_M_AMD64) || defined(_M_X64) || defined(__amd64) ) && ! defined(__x86_64__) -#define __x86_64__ 1 // There are many different macros for this, decide on only one -#endif - -// Find instruction set from compiler macros if INSTRSET not defined -// Note: Most of these macros are not defined in Microsoft compilers -#ifndef INSTRSET -#if defined ( __AVX512F__ ) || defined ( __AVX512__ ) -#define INSTRSET 9 -#elif defined ( __AVX2__ ) -#define INSTRSET 8 -#elif defined ( __AVX__ ) -#define INSTRSET 7 -#elif defined ( __SSE4_2__ ) -#define INSTRSET 6 -#elif defined ( __SSE4_1__ ) -#define INSTRSET 5 -#elif defined ( __SSSE3__ ) -#define INSTRSET 4 -#elif defined ( __SSE3__ ) -#define INSTRSET 3 -#elif defined ( __SSE2__ ) || defined ( __x86_64__ ) -#define INSTRSET 2 -#elif defined ( __SSE__ ) -#define INSTRSET 1 -#elif defined ( _M_IX86_FP ) // Defined in MS compiler. 1: SSE, 2: SSE2 -#define INSTRSET _M_IX86_FP -#else -#define INSTRSET 0 -#endif // instruction set defines -#endif // INSTRSET - -// Include the appropriate header file for intrinsic functions -#if INSTRSET > 7 // AVX2 and later -#if defined (__GNUC__) && ! defined (__INTEL_COMPILER) -#include // x86intrin.h includes header files for whatever instruction - // sets are specified on the compiler command line, such as: - // xopintrin.h, fma4intrin.h -#else -#include // MS version of immintrin.h covers AVX, AVX2 and FMA3 -#endif // __GNUC__ -#elif INSTRSET == 7 -#include // AVX -#elif INSTRSET == 6 -#include // SSE4.2 -#elif INSTRSET == 5 -#include // SSE4.1 -#elif INSTRSET == 4 -#include // SSSE3 -#elif INSTRSET == 3 -#include // SSE3 -#elif INSTRSET == 2 -#include // SSE2 -#elif INSTRSET == 1 -#include // SSE -#endif // INSTRSET - -#if INSTRSET >= 8 && !defined(__FMA__) -// Assume that all processors that have AVX2 also have FMA3 -#if defined (__GNUC__) && ! defined (__INTEL_COMPILER) && ! defined (__clang__) -// Prevent error message in g++ when using FMA intrinsics with avx2: -#pragma message "It is recommended to specify also option -mfma when using -mavx2 or higher" -#else -#define __FMA__ 1 -#endif -#endif - -// AMD instruction sets -#if defined (__XOP__) || defined (__FMA4__) -#ifdef __GNUC__ -#include // AMD XOP (Gnu) -#else -#include // AMD XOP (Microsoft) -#endif // __GNUC__ -#elif defined (__SSE4A__) // AMD SSE4A -#include -#endif // __XOP__ - -// FMA3 instruction set -#if defined (__FMA__) && (defined(__GNUC__) || defined(__clang__)) && ! defined (__INTEL_COMPILER) -#include -#endif // __FMA__ - -// FMA4 instruction set -#if defined (__FMA4__) && (defined(__GNUC__) || defined(__clang__)) -#include // must have both x86intrin.h and fma4intrin.h, don't know why -#endif // __FMA4__ - - -// Define integer types with known size -#if defined(__GNUC__) || defined(__clang__) || (defined(_MSC_VER) && _MSC_VER >= 1600) - // Compilers supporting C99 or C++0x have stdint.h defining these integer types - #include -#elif defined(_MSC_VER) - // Older Microsoft compilers have their own definitions - typedef signed __int8 int8_t; - typedef unsigned __int8 uint8_t; - typedef signed __int16 int16_t; - typedef unsigned __int16 uint16_t; - typedef signed __int32 int32_t; - typedef unsigned __int32 uint32_t; - typedef signed __int64 int64_t; - typedef unsigned __int64 uint64_t; - #ifndef _INTPTR_T_DEFINED - #define _INTPTR_T_DEFINED - #ifdef __x86_64__ - typedef int64_t intptr_t; - #else - typedef int32_t intptr_t; - #endif - #endif -#else - // This works with most compilers - typedef signed char int8_t; - typedef unsigned char uint8_t; - typedef signed short int int16_t; - typedef unsigned short int uint16_t; - typedef signed int int32_t; - typedef unsigned int uint32_t; - typedef long long int64_t; - typedef unsigned long long uint64_t; - #ifdef __x86_64__ - typedef int64_t intptr_t; - #else - typedef int32_t intptr_t; - #endif -#endif - -#include // define abs(int) - -#ifdef _MSC_VER // Microsoft compiler or compatible Intel compiler -#include // define _BitScanReverse(int), __cpuid(int[4],int), _xgetbv(int) -#endif // _MSC_VER - -// functions in instrset_detect.cpp -#ifdef VCL_NAMESPACE -namespace VCL_NAMESPACE { -#endif - int instrset_detect(void); // tells which instruction sets are supported - bool hasFMA3(void); // true if FMA3 instructions supported - bool hasFMA4(void); // true if FMA4 instructions supported - bool hasXOP(void); // true if XOP instructions supported - bool hasAVX512ER(void); // true if AVX512ER instructions supported -#ifdef VCL_NAMESPACE -} -#endif - -// GCC version -#if defined(__GNUC__) && !defined (GCC_VERSION) && !defined (__clang__) -#define GCC_VERSION ((__GNUC__) * 10000 + (__GNUC_MINOR__) * 100 + (__GNUC_PATCHLEVEL__)) -#endif - -// Clang version -#if defined (__clang__) -#define CLANG_VERSION ((__clang_major__) * 10000 + (__clang_minor__) * 100 + (__clang_patchlevel__)) -// Problem: The version number is not consistent across platforms -// http://llvm.org/bugs/show_bug.cgi?id=12643 -// Apple bug 18746972 -#endif - -// Fix problem with non-overloadable macros named min and max in WinDef.h -#ifdef _MSC_VER -#if defined (_WINDEF_) && defined(min) && defined(max) -#undef min -#undef max -#endif -#ifndef NOMINMAX -#define NOMINMAX -#endif -#endif - -#ifdef VCL_NAMESPACE -namespace VCL_NAMESPACE { -#endif - // Template class to represent compile-time integer constant - template class Const_int_t {}; // represent compile-time signed integer constant - template class Const_uint_t {}; // represent compile-time unsigned integer constant - #define const_int(n) (Const_int_t ()) // n must be compile-time integer constant - #define const_uint(n) (Const_uint_t()) // n must be compile-time unsigned integer constant - - // Template for compile-time error messages - template class Static_error_check { - public: Static_error_check() {}; - }; - template <> class Static_error_check { // generate compile-time error if false - private: Static_error_check() {}; - }; -#ifdef VCL_NAMESPACE -} -#endif - - -#endif // INSTRSET_H diff --git a/macros/CodeChunk/instrset_detect.cpp b/macros/CodeChunk/instrset_detect.cpp deleted file mode 100644 index b6be412f2..000000000 --- a/macros/CodeChunk/instrset_detect.cpp +++ /dev/null @@ -1,186 +0,0 @@ -/************************** instrset_detect.cpp **************************** -* Author: Agner Fog -* Date created: 2012-05-30 -* Last modified: 2017-05-02 -* Version: 1.28 -* Project: vector classes -* Description: -* Functions for checking which instruction sets are supported. -* -* (c) Copyright 2012-2017 GNU General Public License http://www.gnu.org/licenses -\*****************************************************************************/ - -#include "instrset.h" - -#ifdef VCL_NAMESPACE -namespace VCL_NAMESPACE { -#endif - -// Define interface to cpuid instruction. -// input: eax = functionnumber, ecx = 0 -// output: eax = output[0], ebx = output[1], ecx = output[2], edx = output[3] -static inline void cpuid (int output[4], int functionnumber) { -#if defined(__GNUC__) || defined(__clang__) // use inline assembly, Gnu/AT&T syntax - - int a, b, c, d; - __asm("cpuid" : "=a"(a),"=b"(b),"=c"(c),"=d"(d) : "a"(functionnumber),"c"(0) : ); - output[0] = a; - output[1] = b; - output[2] = c; - output[3] = d; - -#elif defined (_MSC_VER) || defined (__INTEL_COMPILER) // Microsoft or Intel compiler, intrin.h included - - __cpuidex(output, functionnumber, 0); // intrinsic function for CPUID - -#else // unknown platform. try inline assembly with masm/intel syntax - - __asm { - mov eax, functionnumber - xor ecx, ecx - cpuid; - mov esi, output - mov [esi], eax - mov [esi+4], ebx - mov [esi+8], ecx - mov [esi+12], edx - } - -#endif -} - -// Define interface to xgetbv instruction -static inline int64_t xgetbv (int ctr) { -#if (defined (_MSC_FULL_VER) && _MSC_FULL_VER >= 160040000) || (defined (__INTEL_COMPILER) && __INTEL_COMPILER >= 1200) // Microsoft or Intel compiler supporting _xgetbv intrinsic - - return _xgetbv(ctr); // intrinsic function for XGETBV - -#elif defined(__GNUC__) // use inline assembly, Gnu/AT&T syntax - - uint32_t a, d; - __asm("xgetbv" : "=a"(a),"=d"(d) : "c"(ctr) : ); - return a | (uint64_t(d) << 32); - -#else // #elif defined (_WIN32) // other compiler. try inline assembly with masm/intel/MS syntax - - uint32_t a, d; - __asm { - mov ecx, ctr - _emit 0x0f - _emit 0x01 - _emit 0xd0 ; // xgetbv - mov a, eax - mov d, edx - } - return a | (uint64_t(d) << 32); - -#endif -} - - -/* find supported instruction set - return value: - 0 = 80386 instruction set - 1 or above = SSE (XMM) supported by CPU (not testing for O.S. support) - 2 or above = SSE2 - 3 or above = SSE3 - 4 or above = Supplementary SSE3 (SSSE3) - 5 or above = SSE4.1 - 6 or above = SSE4.2 - 7 or above = AVX supported by CPU and operating system - 8 or above = AVX2 - 9 or above = AVX512F - 10 or above = AVX512VL - 11 or above = AVX512BW, AVX512DQ -*/ -int instrset_detect(void) { - - static int iset = -1; // remember value for next call - if (iset >= 0) { - return iset; // called before - } - iset = 0; // default value - int abcd[4] = {0,0,0,0}; // cpuid results - cpuid(abcd, 0); // call cpuid function 0 - if (abcd[0] == 0) return iset; // no further cpuid function supported - cpuid(abcd, 1); // call cpuid function 1 for feature flags - if ((abcd[3] & (1 << 0)) == 0) return iset; // no floating point - if ((abcd[3] & (1 << 23)) == 0) return iset; // no MMX - if ((abcd[3] & (1 << 15)) == 0) return iset; // no conditional move - if ((abcd[3] & (1 << 24)) == 0) return iset; // no FXSAVE - if ((abcd[3] & (1 << 25)) == 0) return iset; // no SSE - iset = 1; // 1: SSE supported - if ((abcd[3] & (1 << 26)) == 0) return iset; // no SSE2 - iset = 2; // 2: SSE2 supported - if ((abcd[2] & (1 << 0)) == 0) return iset; // no SSE3 - iset = 3; // 3: SSE3 supported - if ((abcd[2] & (1 << 9)) == 0) return iset; // no SSSE3 - iset = 4; // 4: SSSE3 supported - if ((abcd[2] & (1 << 19)) == 0) return iset; // no SSE4.1 - iset = 5; // 5: SSE4.1 supported - if ((abcd[2] & (1 << 23)) == 0) return iset; // no POPCNT - if ((abcd[2] & (1 << 20)) == 0) return iset; // no SSE4.2 - iset = 6; // 6: SSE4.2 supported - if ((abcd[2] & (1 << 27)) == 0) return iset; // no OSXSAVE - if ((xgetbv(0) & 6) != 6) return iset; // AVX not enabled in O.S. - if ((abcd[2] & (1 << 28)) == 0) return iset; // no AVX - iset = 7; // 7: AVX supported - cpuid(abcd, 7); // call cpuid leaf 7 for feature flags - if ((abcd[1] & (1 << 5)) == 0) return iset; // no AVX2 - iset = 8; - if ((abcd[1] & (1 << 16)) == 0) return iset; // no AVX512 - cpuid(abcd, 0xD); // call cpuid leaf 0xD for feature flags - if ((abcd[0] & 0x60) != 0x60) return iset; // no AVX512 - iset = 9; - cpuid(abcd, 7); // call cpuid leaf 7 for feature flags - if ((abcd[1] & (1 << 31)) == 0) return iset; // no AVX512VL - iset = 10; - if ((abcd[1] & 0x40020000) != 0x40020000) return iset; // no AVX512BW, AVX512DQ - iset = 11; - return iset; -} - -// detect if CPU supports the FMA3 instruction set -bool hasFMA3(void) { - if (instrset_detect() < 7) return false; // must have AVX - int abcd[4]; // cpuid results - cpuid(abcd, 1); // call cpuid function 1 - return ((abcd[2] & (1 << 12)) != 0); // ecx bit 12 indicates FMA3 -} - -// detect if CPU supports the FMA4 instruction set -bool hasFMA4(void) { - if (instrset_detect() < 7) return false; // must have AVX - int abcd[4]; // cpuid results - cpuid(abcd, 0x80000001); // call cpuid function 0x80000001 - return ((abcd[2] & (1 << 16)) != 0); // ecx bit 16 indicates FMA4 -} - -// detect if CPU supports the XOP instruction set -bool hasXOP(void) { - if (instrset_detect() < 7) return false; // must have AVX - int abcd[4]; // cpuid results - cpuid(abcd, 0x80000001); // call cpuid function 0x80000001 - return ((abcd[2] & (1 << 11)) != 0); // ecx bit 11 indicates XOP -} - -// detect if CPU supports the F16C instruction set -bool hasF16C(void) { - if (instrset_detect() < 7) return false; // must have AVX - int abcd[4]; // cpuid results - cpuid(abcd, 1); // call cpuid function 1 - return ((abcd[2] & (1 << 29)) != 0); // ecx bit 29 indicates F16C -} - -// detect if CPU supports the AVX512ER instruction set -bool hasAVX512ER(void) { - if (instrset_detect() < 9) return false; // must have AVX512F - int abcd[4]; // cpuid results - cpuid(abcd, 7); // call cpuid function 7 - return ((abcd[1] & (1 << 27)) != 0); // ebx bit 27 indicates AVX512ER -} - - -#ifdef VCL_NAMESPACE -} -#endif diff --git a/macros/CodeChunk/sse.C b/macros/CodeChunk/sse.C deleted file mode 100644 index 0ace7240a..000000000 --- a/macros/CodeChunk/sse.C +++ /dev/null @@ -1,12 +0,0 @@ -#include - -int main() { - // SSE 2 - __m128d P ; - double p = 0; - P = _mm_set1_pd(p); - P = _mm_add_pd(P,P); - // SSE 4.1 - P = _mm_floor_pd(P); - return 0; -} diff --git a/macros/avx-check.m4 b/macros/avx-check.m4 deleted file mode 100644 index edbfd4250..000000000 --- a/macros/avx-check.m4 +++ /dev/null @@ -1,114 +0,0 @@ -dnl Check for AVX -dnl Copyright (c) 2011 FFLAS-FFPACK -dnl Created by BB, 2014-03-25 -dnl ========LICENCE======== -dnl This file is part of the library FFLAS-FFPACK. -dnl -dnl FFLAS-FFPACK is free software: you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public -dnl License as published by the Free Software Foundation; either -dnl version 2.1 of the License, or (at your option) any later version. -dnl -dnl This library is distributed in the hope that it will be useful, -dnl but WITHOUT ANY WARRANTY; without even the implied warranty of -dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -dnl Lesser General Public License for more details. -dnl -dnl You should have received a copy of the GNU Lesser General Public -dnl License along with this library; if not, write to the Free Software -dnl Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -dnl ========LICENCE======== -dnl - -dnl FF_CHECK_AVX -dnl -dnl turn on AVX or AVX2 extensions if available - -AC_DEFUN([FF_CHECK_AVX], -[ - AC_ARG_ENABLE(avx,[AC_HELP_STRING([--disable-avx], [ Disable Intel(r) AVX])]) - AC_MSG_CHECKING(for AVX) - AS_IF([ test "x$enable_avx" != "xno" ], - [ - BACKUP_CXXFLAGS=${CXXFLAGS} - CODE_AVX=`cat macros/CodeChunk/avx.C` - - dnl Check for AVX - dnl Intel compilers usually do not require option to enable avx - dnl Thus, we test with no option on - for switch_avxflags in "" "-mavx"; do - CXXFLAGS="${BACKUP_CXXFLAGS} -O0 ${switch_avxflags}" - AC_TRY_RUN([ ${CODE_AVX} ], - [ - avx_found="yes" - AVXFLAGS=${switch_avxflags} - break - ], - [ - avx_found="no" - ], - [ - echo "cross compiling...disabling" - avx_found="no" - break - ]) - done - - dnl Is AVX found? - AS_IF([ test "x$avx_found" = "xyes" ], - [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_AVX_INSTRUCTIONS,1,[Define if AVX is available]) - - dnl Check for AVX2 - AC_MSG_CHECKING(for AVX2) - - for switch_avx2flags in "" "-mfma -mavx2"; do - CXXFLAGS="${BACKUP_CXXFLAGS} -O0 ${switch_avx2flags}" - AC_TRY_RUN( - [ - #define __try_avx2 - ${CODE_AVX} - ], - [ - avx2_found="yes" - AVX2FLAGS=${switch_avx2flags} - break - ], - [ - avx2_found="no" - ], - [ - echo "cross compiling...disabling" - avx2_found = "no" - break - ]) - done - - dnl Is AVX2 found? - AS_IF([ test "x$avx2_found" = "xyes" ], - [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_AVX2_INSTRUCTIONS,1,[Define if AVX2 is available]) - AVXFLAGS=${AVX2FLAGS} - ], - [ - dnl No AVX2 - AC_MSG_RESULT(no) - ] - ) - ], - [ - dnl No AVX - AC_MSG_RESULT(no) - ] - ) - - CXXFLAGS=${BACKUP_CXXFLAGS} - ], - [ - dnl --disable-avx - AC_MSG_RESULT(no [disabled]) - ] - ) -]) diff --git a/macros/ax_check_x86_features.m4 b/macros/ax_check_x86_features.m4 deleted file mode 100644 index 22e030b5f..000000000 --- a/macros/ax_check_x86_features.m4 +++ /dev/null @@ -1,77 +0,0 @@ -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_check_x86_features.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_CHECK_X86_FEATURES([ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND]) -# -# DESCRIPTION -# -# Checks if the host cpu supports various x86 instruction set, the -# instructions that will get tested are "mmx, popcnt, sse, sse2, sse3, -# sse4.1, sse4.2, sse4a, avx, avx2, avx512f, fma, fma4, bmi, bmi2". If the -# instruction set is supported by the host cpu, the C preprocessor macro -# HAVE_XXX_INSTRUCTIONS is set to 1. The XXX is up-cased instruction case -# with dot replaced by underscore. For example, the test for "sse4.2" -# would export HAVE_SSE4_2_INSTRUCTIONS=1. Also the compiler flag -# "-msse4.2" would be added to X86_FEATURE_CFLAGS variable, that can be -# obtained in Makefile.am using @X86_FEATURE_CFLAGS@. -# -# If any of the test for the instruction set were succeeded, the configure -# script would run ACTION-IF-FOUND if it is specified, or append -# X86_FEATURE_CFLAGS to CFLAGS. If none of the instruction were found, -# ACTION-IF-NOT-FOUND hook is triggered. -# -# This macro requires gcc extended builtin function "__builtin_cpu_init" -# and "__builtin_cpu_supports" to detect the cpu features. It will error -# out if the compiler doesn't has these builtins. -# -# See also AX_GCC_X86_CPU_SUPPORTS, which is the actual macro that perform -# the checks for the instruction sets. -# -# LICENSE -# -# Copyright (c) 2016 Felix Chern -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation; either version 2 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . -# -# As a special exception, the respective Autoconf Macro's copyright owner -# gives unlimited permission to copy, distribute and modify the configure -# scripts that are the output of Autoconf when processing the Macro. You -# need not follow the terms of the GNU General Public License when using -# or distributing such scripts, even though portions of the text of the -# Macro appear in them. The GNU General Public License (GPL) does govern -# all other use of the material that constitutes the Autoconf Macro. -# -# This special exception to the GPL applies to versions of the Autoconf -# Macro released by the Autoconf Archive. When you make and distribute a -# modified version of the Autoconf Macro, you may extend this special -# exception to the GPL to apply to your modified version as well. - -#serial 1 - -AC_DEFUN([AX_CHECK_X86_FEATURES], - [m4_foreach_w( - [ax_x86_feature], - [mmx popcnt sse sse2 sse3 sse4.1 sse4.2 sse4a avx avx2 avx512f fma fma4 bmi bmi2], - [AX_GCC_X86_CPU_SUPPORTS(ax_x86_feature, - [X86_FEATURE_CFLAGS="$X86_FEATURE_CFLAGS -m[]ax_x86_feature"], - []) - ]) - AC_SUBST([X86_FEATURE_CFLAGS]) - m4_ifval([$1],[$1], - [CXXFLAGS="$CXXFLAGS $X86_FEATURE_CFLAGS"]) - $2 -]) diff --git a/macros/ax_gcc_x86_cpu_supports.m4 b/macros/ax_gcc_x86_cpu_supports.m4 deleted file mode 100644 index a61a14adc..000000000 --- a/macros/ax_gcc_x86_cpu_supports.m4 +++ /dev/null @@ -1,104 +0,0 @@ -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_cpu_supports.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_GCC_X86_CPU_SUPPORTS(X86-INSTRUCTION-SET, -# [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND]) -# -# DESCRIPTION -# -# Checks if the host cpu supports X86-INSTRUCTION-SET. The instruction set -# that can be tested are "mmx, popcnt, sse, sse2, sse3, sse4.1, sse4.2, -# sse4a, avx, avx2, avx512f, fma, fma4, bmi, bmi2". If the instruction set -# is supported by the host cpu, the C preprocessor macro -# HAVE_XXX_INSTRUCTIONS is set to 1. The XXX is up-cased instruction case -# with dot replaced by underscore. For example, the test for "sse4.2" -# would export HAVE_SSE4_2_INSTRUCTIONS=1. This macro requires gcc -# extended builtin function "__builtin_cpu_init" and -# "__builtin_cpu_supports" to detect the cpu features. It will error out -# if the compiler doesn't has these builtins. -# -# If the test for the instruction set succeeded, the hook ACTION-IF-FOUND -# would run. Otherwise the hook ACTION-IF-NOT-FOUND would run if -# specified. -# -# See also AX_CHECK_X86_FEATURES, which checks all the possible -# instruction set and export the corresponding CFLAGS. -# -# LICENSE -# -# Copyright (c) 2016 Felix Chern -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation; either version 2 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . -# -# As a special exception, the respective Autoconf Macro's copyright owner -# gives unlimited permission to copy, distribute and modify the configure -# scripts that are the output of Autoconf when processing the Macro. You -# need not follow the terms of the GNU General Public License when using -# or distributing such scripts, even though portions of the text of the -# Macro appear in them. The GNU General Public License (GPL) does govern -# all other use of the material that constitutes the Autoconf Macro. -# -# This special exception to the GPL applies to versions of the Autoconf -# Macro released by the Autoconf Archive. When you make and distribute a -# modified version of the Autoconf Macro, you may extend this special -# exception to the GPL to apply to your modified version as well. - -#serial 1 - -AC_DEFUN_ONCE([_AX_GCC_X86_CPU_INIT], - [AC_LANG_PUSH([C]) - AC_CACHE_CHECK([for gcc __builtin_cpu_init function], - [ax_cv_gcc_check_x86_cpu_init], - [AC_RUN_IFELSE( - [AC_LANG_PROGRAM([#include ], - [__builtin_cpu_init ();]) - ], - [ax_cv_gcc_check_x86_cpu_init=yes], - [ax_cv_gcc_check_x86_cpu_init=no])]) - AS_IF([test "X$ax_cv_gcc_check_x86_cpu_init" = "Xno"], - [AC_MSG_ERROR([Need GCC to support X86 CPU features tests])]) -]) - -AC_DEFUN([AX_GCC_X86_CPU_SUPPORTS], - [AC_REQUIRE([AC_PROG_CC]) - AC_REQUIRE([_AX_GCC_X86_CPU_INIT]) - AC_LANG_PUSH([C]) - AS_VAR_PUSHDEF([gcc_x86_feature], [AS_TR_SH([ax_cv_gcc_x86_cpu_supports_$1])]) - AC_CACHE_CHECK([for x86 $1 instruction support], - [gcc_x86_feature], - [AC_RUN_IFELSE( - [AC_LANG_PROGRAM( [#include ], - [ __builtin_cpu_init (); - if (__builtin_cpu_supports("$1")) - return 0; - return 1; - ])], - [gcc_x86_feature=yes], - [gcc_x86_feature=no] - )] - ) - AC_LANG_POP([C]) - AS_VAR_IF([gcc_x86_feature],[yes], - [AC_DEFINE( - AS_TR_CPP([HAVE_$1_INSTRUCTIONS]), - [1], - [Define if $1 instructions are supported]) - $2], - [$3] - ) - AS_VAR_POPDEF([gcc_x86_feature]) -]) diff --git a/macros/common.m4 b/macros/common.m4 new file mode 100644 index 000000000..f8f95a1a5 --- /dev/null +++ b/macros/common.m4 @@ -0,0 +1,125 @@ +dnl Copyright(c)'2019 FFLAS-FFPACK +dnl +dnl ========LICENCE======== +dnl This file is part of the library FFLAS-FFPACK. +dnl +dnl FFLAS-FFPACK is free software: you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public +dnl License as published by the Free Software Foundation; either +dnl version 2.1 of the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with this library; if not, write to the Free Software +dnl Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +dnl ========LICENCE======== +dnl/ + +dnl Set OPTIM_FLAGS, DEBUG_FLAGS depending on compiler (in CCNAM) and command +dnl line arguments of ./configure (--enable-debug, --enable-warnings and +dnl --enable-profile) +AC_DEFUN([SET_FLAGS],[ + dnl require AC_DEBUG so USE_DEBUG is defined + AC_REQUIRE([AC_DEBUG]) + dnl require AC_PROFILE so PROF is defined + AC_REQUIRE([AC_PROFILE]) + dnl require AC_WARNINGS so WARN is defined + AC_REQUIRE([AC_WARNINGS]) + dnl require AC_COMPILER_NAME so CCNAM is defined + AC_REQUIRE([AC_COMPILER_NAME]) + + # --enable-debug ? + # __FFLASFFPACK_DEBUG will be (un)set in fflas-ffpack/config.h + #TODO use -fast for icc, -ipa for eko... + AS_IF([test "x$USE_DEBUG" = "xyes"], + [OPTIM_FLAGS="-O0" + DEBUG_FLAGS="-Wall -g -UNDEBUG -DDEBUG"], + [OPTIM_FLAGS="-O2" + DEBUG_FLAGS="-Wall -DNDEBUG -UDEBUG"] + ) + + # --enable-profile ? + AS_IF([test "x$PROF" = "xyes"], [ DEBUG_FLAGS+=" -pg" ]) + + # --enable-warnings ? + AS_IF([test "x$WARN" = "xyes" -o "x$WARN" = "xfull"], + [AS_CASE([$CCNAM], + [eko], [], + [gcc*|icc*|clang*], [ DEBUG_FLAGS+=" -Wextra" ], + [AS_BOX([Unsupported compiler ($CCNAM). Please file a bug.],[*])] + ) + ]) + + AS_IF([test "x$WARN" = "xfull"], + [AS_CASE([$CCNAM], + [eko], + [], + [gcc*|icc*|clang*], + [ + DEBUG_FLAGS+=" -Wuninitialized -Wconversion -Wcast-qual " + DEBUG_FLAGS+=" -pedantic -Wshadow -Wpointer-arith " + DEBUG_FLAGS+=" -Wwrite-strings -Wno-long-long" + AS_CASE([$CCNAM], + [icc], + [ DEBUG_FLAGS+=" -Wcheck -ansi" ], + [gcc*], + [ + DEBUG_FLAGS+=" -Wno-vla" + DEBUG_FLAGS+=" -Wcast-align -Wno-variadic-macros" + ], + [clang*], + [ + DEBUG_FLAGS+=" -Wno-vla-extension -D__STRICT_ANSI__" + DEBUG_FLAGS+=" -Wcast-align -Wno-variadic-macros" + ]) + ], + [AS_BOX([Unsupported compiler ($CCNAM). Please file a bug.],[*])] + ) + ]) + ]) + + + +dnl Append -march=native to OPTIM_FLAGS if not present in CXXFLAGS and +dnl target==host and --no-marchnative is not set +AC_DEFUN([ARCH_FLAGS],[ + AC_ARG_WITH(archnative, [AC_HELP_STRING([--without-archnative], + [do not use -march=native (default is to use it if -march is not present in CXXFLAGS)])]) + + AS_CASE([$CXXFLAGS], + [*-march=*], [], # do nothing if already set in CXXFLAGS + [AS_IF([test "x${with_archnative}" == "xno"], + [], # do nothing if option is set to no + [AS_IF([test "${host}" != "${build}" -o "${host}" != "${target}"], + [AC_MSG_NOTICE("For efficiency you may want to add a '-march=...' flag in CXXFLAGS")], + [AC_MSG_NOTICE("Adding '-march=native' to OPTIM_FLAGS") + OPTIM_FLAGS+=" -march=native"])])]) + ]) + +dnl Append -mfpmath=sse to OPTIM_FLAGS on i386 and i686 architecture with SSE +AC_DEFUN([FPMATH_FLAGS],[ + AC_REQUIRE([ARCH_FLAGS]) + + BACKUP_CXXFLAGS="${CXXFLAGS}" + CXXFLAGS="${OPTIM_FLAGS} ${CXXFLAGS}" + AS_CASE([$target], + [*i386*|*i686*], + [AC_RUN_IFELSE([AC_LANG_PROGRAM([[]], [[#ifdef __SSE__ + return 0; + #else + return 1; + #endif + ]])], + [AC_MSG_NOTICE("Adding '-mfpmath=sse' to OPTIM_FLAGS") + OPTIM_FLAGS+=" -mfpmath=sse"], + [], # either the flag is not recognized by the compiler or + # SSE is not avail => do nothing + [AC_MSG_NOTICE("If available you may want to add + '-mfpmath=sse' to flags")])] # cross-compilation case + []) # not on i386 nor i686 => do nothing + CXXFLAGS="${BACKUP_CXXFLAGS}" + ]) diff --git a/macros/debug.m4 b/macros/debug.m4 index 5f2297526..4fa78347c 100644 --- a/macros/debug.m4 +++ b/macros/debug.m4 @@ -68,112 +68,96 @@ If full is given, we become paranoïd about warnings and treat them as errors.]) CCNAM="" AC_DEFUN([AC_COMPILER_NAME], [ - AC_MSG_CHECKING(for family name of compiler) - - dnl CHECKING for various compilers - dnl ICC ? - AC_TRY_RUN( [ - #ifdef __INTEL_COMPILER - int main() { return 0 ; } - #else - not intel - #endif], - [ AC_MSG_RESULT(icc) - CCNAM=icc - AC_SUBST(CCNAM) - ]) - -dnl PATHSCALE > 4 ? - AS_IF([ test -z "${CCNAM}"], [ - AC_TRY_RUN( [ - #ifdef __PATHSCALE__ - int main() { return !(__PATHCC__ >= 4) ; } - #else - not ekopath neither. - #endif], [ - AC_MSG_RESULT(eko) - CCNAM=eko - AC_SUBST(CCNAM) ]) - ]) - -dnl CLANG >= 3.9 ? - AS_IF([ test -z "${CCNAM}"], [ - AC_TRY_RUN( [ - #ifdef __clang__ - int main() { return !((__clang_major__ >= 4) ||(__clang_major__ ==3 && __clang_minor__ >= 9) ) ; } - #else - not clang3.9 - #endif], [ - CCNAM=clang - AC_SUBST(CCNAM) - AC_MSG_RESULT($CCNAM) ]) - ]) -dnl 3.1 < CLANG <= 3.8 ? - AS_IF([ test -z "${CCNAM}"], [ - AC_TRY_RUN( [ - #ifdef __clang__ - int main() { return !(__clang_major__ ==3 && __clang_minor__ >=1 && __clang_minor__ <=8) ; } - #else - not clang3.8 - #endif], [ - CCNAM=clang38 - AC_SUBST(CCNAM) - AC_MSG_RESULT($CCNAM) ]) - ]) - -dnl GCC >= 4.9.3 ? - AS_IF([ test -z "${CCNAM}"], [ - AC_TRY_RUN( [ - #ifdef __GNUC__ - int main() { return !(__GNUC__ >= 5 || (__GNUC__ == 4 && (__GNUC_MINOR__ > 9 || - (__GNUC_MINOR__ == 9 && __GNUC_PATCHLEVEL__ > 2)))) ; } - #else - not gcc neither. - #endif], [ - CCNAM=gcc - AC_SUBST(CCNAM) - AC_MSG_RESULT($CCNAM) - ]) - ]) - -dnl GCC == 4.9.2 ? - AS_IF([ test -z "${CCNAM}"], [ - AC_TRY_RUN( [ - #ifdef __GNUC__ - int main() { return !(__GNUC__ == 4 && __GNUC_MINOR__ == 9 && __GNUC_PATCHLEVEL__ ==2 ) ; } - #else - not gcc neither. - #endif], [ - CCNAM=gcc492 - AC_SUBST(CCNAM) - AC_MSG_RESULT($CCNAM) - ]) - ]) - - -dnl GCC >= 4.8 < 4.9.2 ? - AS_IF([ test -z "${CCNAM}"], [ - AC_TRY_RUN( [ - #ifdef __GNUC__ - int main() { return !(__GNUC__ == 4 && (__GNUC_MINOR__ == 8 || - (__GNUC_MINOR__ == 9 && __GNUC_PATCHLEVEL__ < 2))) ; } - #else - not gcc neither. - #endif], [ - CCNAM=gcc48 - AC_SUBST(CCNAM) - AC_MSG_RESULT($CCNAM) - ]) - ]) - dnl other ? - - AS_IF([ test -z "${CCNAM}"], - [ AC_MSG_RESULT(unknown) - CCNAM=unknown - AC_SUBST(CCNAM) - echo - echo " *** unknow compiler. please file a bug " - echo - ]) + AC_MSG_CHECKING(for family name of compiler) + + dnl CHECKING for various compilers + dnl ICC ? + AS_IF([ test -z "${CCNAM}"], [ + AC_TRY_RUN( [ + #ifdef __INTEL_COMPILER + int main() { return 0 ; } + #else + not intel + #endif], + [ CCNAM=icc ]) + ]) + + dnl PATHSCALE > 4 ? + AS_IF([ test -z "${CCNAM}"], [ + AC_TRY_RUN( [ + #ifdef __PATHSCALE__ + int main() { return !(__PATHCC__ >= 4) ; } + #else + not ekopath either. + #endif], + [ CCNAM=eko ]) + ]) + + dnl CLANG >= 3.9 ? + AS_IF([ test -z "${CCNAM}"], [ + AC_TRY_RUN( [ + #ifdef __clang__ + int main() { return !((__clang_major__ >= 4) || (__clang_major__ == 3 && __clang_minor__ >= 9)) ; } + #else + not clang3.9 + #endif], + [ CCNAM=clang ]) + ]) + + dnl 3.1 < CLANG <= 3.8 ? + AS_IF([ test -z "${CCNAM}"], [ + AC_TRY_RUN( [ + #ifdef __clang__ + int main() { return !(__clang_major__ == 3 && __clang_minor__ >= 1 && __clang_minor__ <= 8) ; } + #else + not clang3.8 + #endif], + [ CCNAM=clang38 ]) + ]) + + dnl GCC >= 5 ? + AS_IF([ test -z "${CCNAM}"], [ + AC_TRY_RUN( [ + #ifdef __GNUC__ + int main() { return !(__GNUC__ >= 5 ) ; } + #else + not gcc neither. + #endif], + [ CCNAM=gcc ]) + ]) + + dnl 4.3 <= GCC < 5 ? + AS_IF([ test -z "${CCNAM}"], [ + AC_TRY_RUN( [ + #ifdef __GNUC__ + int main() { return !(__GNUC__ == 4 && __GNUC_MINOR__ >= 3) ; } + #else + not gcc neither. + #endif], + [ CCNAM=gcc4 ]) + ]) + + dnl GCC == 4.9.2 ? + AS_IF([ test -z "${CCNAM}"], [ + AC_TRY_RUN( [ + #ifdef __GNUC__ + int main() { return !(__GNUC__ == 4 && __GNUC_MINOR__ == 9 && __GNUC_PATCHLEVEL__ == 2 ) ; } + #else + not gcc neither. + #endif], + [ CCNAM=gcc492 ]) + ]) + + dnl other ? + AS_IF([ test -z "${CCNAM}"], + [ + CCNAM=unknow + AC_MSG_RESULT($CCNAM) + AS_BOX([*** unknown compiler, please file a bug. ***], [*]) + ], + [ + AC_MSG_RESULT($CCNAM) + ]) + + AC_SUBST(CCNAM) ]) - diff --git a/macros/fflas-ffpack-opt.m4 b/macros/fflas-ffpack-opt.m4 deleted file mode 100644 index bb54037d8..000000000 --- a/macros/fflas-ffpack-opt.m4 +++ /dev/null @@ -1,170 +0,0 @@ -dnl Copyright (c) 2012 FFLAS-FFPACK -dnl Written by Clément Pernet, Brice Boyer. -dnl This file was taken from LinBox linbox-opt.m4 -dnl ========LICENCE======== -dnl This file is part of the library FFLAS-FFPACK. -dnl -dnl FFLAS-FFPACK is free software: you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public -dnl License as published by the Free Software Foundation; either -dnl version 2.1 of the License, or (at your option) any later version. -dnl -dnl This library is distributed in the hope that it will be useful, -dnl but WITHOUT ANY WARRANTY; without even the implied warranty of -dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -dnl Lesser General Public License for more details. -dnl -dnl You should have received a copy of the GNU Lesser General Public -dnl License along with this library; if not, write to the Free Software -dnl Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -dnl ========LICENCE======== -dnl/ - - - - - - -AC_DEFUN([FF_OPT], -[ -AC_MSG_CHECKING([whether to use run time optimization]) - -AC_ARG_ENABLE(optimization, -[AC_HELP_STRING([--disable-optimization], [ Disable run time optimization in FflasFpack code])]) - -dnl creating the optimise file unconditionally - -echo "#ifndef __FFLASFFPACK_optimise_H" > fflas-ffpack/fflas-ffpack-optimise.h -echo "#define __FFLASFFPACK_optimise_H" >> fflas-ffpack/fflas-ffpack-optimise.h -echo "" >> fflas-ffpack/fflas-ffpack-optimise.h -dnl The optimise.h file has to be correcly written, so we close the #if ! -echo "#endif // optimise.h" >> fflas-ffpack/fflas-ffpack-optimise.h - -AS_IF([test "x$enable_optimization" == "xyes"], -[ -AC_MSG_RESULT(yes) - - -BACKUP_CXXFLAGS=${CXXFLAGS} -BACKUP_LIBS=${LIBS} - -echo " *** OPTIMIZATION *** " - -AC_MSG_CHECKING([best threshold for Strassen-Winograd matrix multiplication]) -AC_MSG_RESULT([see below]) - -CXXFLAGS_ALL="-I. -I.. -I`pwd` -I`pwd`/fflas-ffpack ${BACKUP_CXXFLAGS} ${AVXFLAGS} ${DEFAULT_CFLAGS} ${GIVARO_CFLAGS} ${CBLAS_FLAG} ${OMPFLAGS}" -LIBS="${BACKUP_LIBS} ${CBLAS_LIBS} ${GIVARO_LIBS}" -WINO=`cat optimiser/winograd.C` -ADDFLAGS="-DOPTIMISATION_MODE" -saved_LD_RUN_PATH="$LD_RUN_PATH" -LD_RUN_PATH="${LD_RUN_PATH:+$LD_RUN_PATH$PATH_SEPARATOR}$givaro_lib_path" -export LD_RUN_PATH -dnl for Wino threshold for double -echo " == Wino/BLAS threshold for Givaro::Modular == " -CXXFLAGS="${CXXFLAGS_ALL} -DFLTTYPE=Givaro::Modular ${ADDFLAGS}" -AC_RUN_IFELSE([AC_LANG_SOURCE([${WINO}])],[ - dnl remove last line - dnl sed -i '$d' fflas-ffpack/fflas-ffpack-optimise.h ; - dnl -i does not work on BSD sed - sed '$d' fflas-ffpack/fflas-ffpack-optimise.h > fflas-ffpack/fflas-ffpack-optimise.back.h ; - mv fflas-ffpack/fflas-ffpack-optimise.back.h fflas-ffpack/fflas-ffpack-optimise.h ; - dnl append new definition - cat WinoThreshold >> fflas-ffpack/fflas-ffpack-optimise.h ; - dnl close the file - echo "#endif // optimise.h" >> fflas-ffpack/fflas-ffpack-optimise.h - dnl echo done : `cat WinoThreshold` - WINOT=`cat WinoThreshold | awk 'NR==2' | awk '{print $ 3}'` - dnl cleaning service ! - rm WinoThreshold ; - AC_MSG_RESULT(done (${WINOT})) - ],[ - AC_MSG_RESULT(problem) - break - ],[ - AC_MSG_RESULT(cross compilation) - break - ]) - -dnl for WinoThreshold for float -echo " == Wino/BLAS threshold for Givaro::Modular == " -CXXFLAGS="${CXXFLAGS_ALL} -DFLTTYPE=Givaro::Modular ${ADDFLAGS}" -AC_RUN_IFELSE([AC_LANG_SOURCE([${WINO}])],[ - dnl remove last line - dnl sed -i '$ d' fflas-ffpack/fflas-ffpack-optimise.h ; - sed '$d' fflas-ffpack/fflas-ffpack-optimise.h > fflas-ffpack/fflas-ffpack-optimise.back.h ; - mv fflas-ffpack/fflas-ffpack-optimise.back.h fflas-ffpack/fflas-ffpack-optimise.h ; - dnl append new definition - cat WinoThreshold >> fflas-ffpack/fflas-ffpack-optimise.h ; - dnl close the file - echo "#endif // optimise.h" >> fflas-ffpack/fflas-ffpack-optimise.h - dnl echo done : `cat WinoThreshold` - WINOT=`cat WinoThreshold | awk 'NR==2' | awk '{print $ 3}'` - dnl cleaning service ! - rm WinoThreshold ; - AC_MSG_RESULT(done (${WINOT})) - ],[ - AC_MSG_RESULT(problem) - break - ],[ - AC_MSG_RESULT(cross compilation) - break - ]) - -dnl for Wino threshold for double -echo " == Wino/BLAS threshold for Givaro::ModularBalanced == " -CXXFLAGS="${CXXFLAGS_ALL} -DFLTTYPE=Givaro::ModularBalanced ${ADDFLAGS}" -AC_RUN_IFELSE([AC_LANG_SOURCE([${WINO}])],[ - dnl remove last line - dnl sed -i '$d' fflas-ffpack/fflas-ffpack-optimise.h ; - dnl -i does not work on BSD sed - sed '$d' fflas-ffpack/fflas-ffpack-optimise.h > fflas-ffpack/fflas-ffpack-optimise.back.h ; - mv fflas-ffpack/fflas-ffpack-optimise.back.h fflas-ffpack/fflas-ffpack-optimise.h ; - dnl append new definition - cat WinoThreshold >> fflas-ffpack/fflas-ffpack-optimise.h ; - dnl close the file - echo "#endif // optimise.h" >> fflas-ffpack/fflas-ffpack-optimise.h - dnl cleaning service ! - WINOT=`cat WinoThreshold | awk 'NR==2' | awk '{print $ 3}'` - dnl echo done : `cat WinoThreshold` - rm WinoThreshold ; - AC_MSG_RESULT(done (${WINOT})) - ],[ - AC_MSG_RESULT(problem) - break - ],[ - AC_MSG_RESULT(cross compilation) - break - ]) - -dnl for WinoThreshold for float -echo " == Wino/BLAS threshold for Givaro::ModularBalanced == " -CXXFLAGS="${CXXFLAGS_ALL} -DFLTTYPE=Givaro::ModularBalanced ${ADDFLAGS}" -AC_RUN_IFELSE([AC_LANG_SOURCE([${WINO}])],[ - dnl remove last line - dnl sed -i '$ d' fflas-ffpack/fflas-ffpack-optimise.h ; - sed '$d' fflas-ffpack/fflas-ffpack-optimise.h > fflas-ffpack/fflas-ffpack-optimise.back.h ; - mv fflas-ffpack/fflas-ffpack-optimise.back.h fflas-ffpack/fflas-ffpack-optimise.h ; - dnl append new definition - cat WinoThreshold >> fflas-ffpack/fflas-ffpack-optimise.h ; - dnl close the file - echo "#endif // optimise.h" >> fflas-ffpack/fflas-ffpack-optimise.h - dnl echo done : `cat WinoThreshold` - WINOT=`cat WinoThreshold | awk 'NR==2' | awk '{print $ 3}'` - dnl cleaning service ! - rm WinoThreshold ; - AC_MSG_RESULT(done (${WINOT})) - ],[ - AC_MSG_RESULT(problem) - break - ],[ - AC_MSG_RESULT(cross compilation) - break - ]) -LD_RUN_PATH="$saved_LD_RUN_PATH" -unset givaro_lib_path -], -[AC_MSG_RESULT(no optimization)] -) - -]) diff --git a/macros/instr_set.m4 b/macros/instr_set.m4 deleted file mode 100644 index 85c667a7c..000000000 --- a/macros/instr_set.m4 +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright(c)'1994-2017 by The Givaro group -# This file is part of Givaro. -# Givaro is governed by the CeCILL-B license under French law -# and abiding by the rules of distribution of free software. -# see the COPYRIGHT file for more details. -# -# Author Clement Pernet -#/ - -AC_DEFUN([INSTR_SET], -[ - SIMD_CFLAGS="" - AS_ECHO("Detecting SIMD instruction set") - - AC_ARG_ENABLE(sse,[AC_HELP_STRING([--disable-sse], [ disable SSE instruction set (enabled by default when available)])],[],[]) - AC_ARG_ENABLE(sse2,[AC_HELP_STRING([--disable-sse2], [ disable SSE2 instruction set (enabled by default when available)])],[],[]) - AC_ARG_ENABLE(sse3,[AC_HELP_STRING([--disable-sse3], [ disable SSE3 instruction set (enabled by default when available)])],[],[]) - AC_ARG_ENABLE(ssse3,[AC_HELP_STRING([--disable-ssse3], [ disable SSSE3 instruction set (enabled by default when available)])],[],[]) - AC_ARG_ENABLE(sse41,[AC_HELP_STRING([--disable-sse41], [ disable SSE4.1 instruction set (enabled by default when available)])],[],[]) - AC_ARG_ENABLE(sse42,[AC_HELP_STRING([--disable-sse42], [ disable SSE4.2 instruction set (enabled by default when available)])],[],[]) - AC_ARG_ENABLE(avx,[AC_HELP_STRING([--disable-avx], [ disable AVX instruction set (enabled by default when available)])],[],[]) - AC_ARG_ENABLE(avx2,[AC_HELP_STRING([--disable-avx2], [ disable AVX2 instruction set (enabled by default when available)])],[],[]) - AC_ARG_ENABLE(avx512f, [AC_HELP_STRING([--disable-avx512f], [ disable AVX512F instruction set (enabled by default when available)])],[],[]) - AC_ARG_ENABLE(avx512dq, [AC_HELP_STRING([--disable-avx512dq], [ disable AVX512DQ instruction set (enabled by default when available)])],[],[]) - AC_ARG_ENABLE(avx512vl, [AC_HELP_STRING([--disable-avx512vl], [ disable AVX512VL instruction set (enabled by default when available)])],[],[]) - AC_ARG_ENABLE(fma,[AC_HELP_STRING([--disable-fma], [ disable FMA instruction set (enabled by default when available)])],[],[]) - AC_ARG_ENABLE(fma4,[AC_HELP_STRING([--disable-fma4], [ disable FMA4 instruction set (enabled by default when available)])],[],[]) - - BACKUP_CPPFLAGS=${CPPFLAGS} - CPPFLAGS=-I${srcdir} - AC_TRY_RUN([ - #include "macros/CodeChunk/instrset_detect.cpp" - // increment by one to distinguish from compilation failure error code - int main(){return instrset_detect()+1;} - ],[AS_ECHO("Using 80386 instruction set")],[ - iset=$? - AS_IF([ test "$iset" -ge "2" -a "x$enable_sse" != "xno" ], [ - AS_ECHO("SSE enabled") - SIMD_CFLAGS="${SIMD_CFLAGS} -msse" - HAVE_SSE="yes" - ],[AS_ECHO("SSE disabled")]) - AS_IF([ test "$iset" -ge "3" -a "x$enable_sse2" != "xno" ], [ - AS_ECHO("SSE2 enabled") - SIMD_CFLAGS="${SIMD_CFLAGS} -msse2" - ],[AS_ECHO("SSE2 disabled")]) - AS_IF([ test "$iset" -ge "4" -a "x$enable_sse3" != "xno" ], [ - AS_ECHO("SSE3 enabled") - SIMD_CFLAGS="${SIMD_CFLAGS} -msse3" - ],[AS_ECHO("SSE3 disabled")]) - AS_IF([ test "$iset" -ge "5" -a "x$enable_ssse3" != "xno" ], [ - AS_ECHO("SSSE3 enabled") - SIMD_CFLAGS="${SIMD_CFLAGS} -mssse3" - ],[AS_ECHO("SSSE3 disabled")]) - AS_IF([ test "$iset" -ge "6" -a "x$enable_sse41" != "xno" ], [ - AS_ECHO("SSE4.1 enabled") - SIMD_CFLAGS="${SIMD_CFLAGS} -msse4.1" - ],[AS_ECHO("SSE4.1 disabled")]) - AS_IF([ test "$iset" -ge "7" -a "x$enable_sse42" != "xno" ], [ - AS_ECHO("SSE4.2 enabled") - SIMD_CFLAGS="${SIMD_CFLAGS} -msse4.2" - ],[AS_ECHO("SSE4.2 disabled")]) - AS_IF([ test "$iset" -ge "8" -a "x$enable_avx" != "xno" ], [ - AS_ECHO("AVX enabled") - SIMD_CFLAGS="${SIMD_CFLAGS} -mavx" - ],[AS_ECHO("AVX disabled")]) - AS_IF([ test "$iset" -ge "9" -a "x$enable_avx2" != "xno" ], [ - AS_ECHO("AVX2 enabled") - SIMD_CFLAGS="${SIMD_CFLAGS} -mavx2" - ],[AS_ECHO("AVX2 disabled")]) - AS_IF([ test "$iset" -ge "10" -a "x$enable_avx512f" != "xno" ], [ - AS_ECHO("AVX512F enabled") - SIMD_CFLAGS="${SIMD_CFLAGS} -mavx512f" - ],[AS_ECHO("AVX512F disabled")]) - AS_IF([ test "$iset" -ge "11" -a "x$enable_avx512vl" != "xno" ], [ - AS_ECHO("AVX512VL enabled") - SIMD_CFLAGS="${SIMD_CFLAGS} -mavx512vl" - ],[AS_ECHO("AVX512VL disabled")]) - AS_IF([ test "$iset" -ge "12" -a "x$enable_avx512dq" != "xno" ], [ - AS_ECHO("AVX512DQ enabled") - SIMD_CFLAGS="${SIMD_CFLAGS} -mavx512dq" - ],[AS_ECHO("AVX512DQ disabled")]) - ]) - AC_TRY_RUN([ - #include "macros/CodeChunk/instrset_detect.cpp" - int main(){return !hasFMA3();} - ],[ - AS_IF([ test "x$enable_fma" != "xno" ], [ - AS_ECHO("FMA3 enabled") - SIMD_CFLAGS="${SIMD_CFLAGS} -mfma" - ],[AS_ECHO("FMA3 disabled")]) - ],[AS_ECHO("FMA3 disabled")]) - AC_TRY_RUN([ - #include "macros/CodeChunk/instrset_detect.cpp" - int main(){return !hasFMA4();} - ],[ - AS_IF([ test "x$enable_fma4" != "xno" ], [ - AS_ECHO("FMA4 enabled") - SIMD_CFLAGS="${SIMD_CFLAGS} -mfma4" - ],[AS_ECHO("FMA4 disabled")]) - ],[AS_ECHO("FMA4 disabled")]) - CPPFLAGS=${BACKUP_CPPFLAGS} - -]) diff --git a/macros/simd-check.m4 b/macros/simd-check.m4 deleted file mode 100644 index 4fe8d11fa..000000000 --- a/macros/simd-check.m4 +++ /dev/null @@ -1,133 +0,0 @@ -dnl Check for SIMD -dnl Copyright (c) 2011 FFLAS-FFPACK -dnl Created by BB, 2014-03-25 -dnl modified by CP, 2016-07-11 -dnl ========LICENCE======== -dnl This file is part of the library FFLAS-FFPACK. -dnl -dnl FFLAS-FFPACK is free software: you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public -dnl License as published by the Free Software Foundation; either -dnl version 2.1 of the License, or (at your option) any later version. -dnl -dnl This library is distributed in the hope that it will be useful, -dnl but WITHOUT ANY WARRANTY; without even the implied warranty of -dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -dnl Lesser General Public License for more details. -dnl -dnl You should have received a copy of the GNU Lesser General Public -dnl License along with this library; if not, write to the Free Software -dnl Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -dnl ========LICENCE======== -dnl - -dnl FF_CHECK_SIMD -dnl -dnl turn on SSE4.1 AVX, AVX2 extensions if available - -AC_DEFUN([FF_CHECK_SIMD], -[ - AC_ARG_ENABLE(simd,[AC_HELP_STRING([--disable-simd], [ Disable vectorized instructions: SSE4.1, AVX, AVX2])]) - AS_IF([ test "x$enable_simd" != "xno" ], - [ - AS_ECHO("SIMD enabled") - arch=`echo $target | cut -d"-" -f1` - # if we are on a x86 (32 or 64 bits) with gcc>=4.8 then run the AX_CHECK_X86_FEATURES macro - AS_IF([test "x$arch" = "xx86_64" -o "x$arch" = "xi686"], - [archx86="yes"], - [archx86="no"] - ) - AS_IF([ test "x${CCNAM:0:3}" != "xgcc" -o "x$archx86" = "xno" ], - [ - CUSTOM_SIMD="yes" - echo "Compiling with $CCNAM for a $arch target: running custom checks for SSE4.1 and AVX1,2" - AC_MSG_CHECKING(for SSE 4.1) - BACKUP_CXXFLAGS=${CXXFLAGS} - SSEFLAGS="-msse4.1" - CXXFLAGS="${BACKUP_CXXFLAGS} ${SSEFLAGS}" - CODE_SSE=`cat macros/CodeChunk/sse.C` - AC_TRY_RUN([ ${CODE_SSE} ], - [ sse_found="yes" ], - [ sse_found="no" ], - [ - echo "cross compiling...disabling" - sse_found="no" - ]) - AS_IF([ test "x$sse_found" = "xyes" ], - [ - AC_SUBST(SSEFLAGS) - AC_MSG_RESULT(yes) - ], - [ - SSEFLAGS="" - AC_MSG_RESULT(no) - ]) - CXXFLAGS=${BACKUP_CXXFLAGS} - - dnl Check for AVX - AC_MSG_CHECKING(for AVX) - CODE_AVX=`cat macros/CodeChunk/avx.C` - dnl Intel compilers usually do not require option to enable avx - dnl Thus, we test with no option on - for switch_avxflags in "" "-mavx"; do - CXXFLAGS="${BACKUP_CXXFLAGS} -O0 ${switch_avxflags}" - AC_TRY_RUN([ ${CODE_AVX} ], - [ - avx_found="yes" - AVXFLAGS=${switch_avxflags} - break - ], - [ avx_found="no" ], - [ - echo "cross compiling...disabling" - avx_found="no" - break - ]) - done - - dnl Is AVX found? - AS_IF([ test "x$avx_found" = "xyes" ], - [ - AC_MSG_RESULT(yes) - dnl Check for AVX2 - AC_MSG_CHECKING(for AVX2) - for switch_avx2flags in "" "-mfma -mavx2"; do - CXXFLAGS="${BACKUP_CXXFLAGS} -O0 ${switch_avx2flags}" - AC_TRY_RUN( - [ - #define __try_avx2 - ${CODE_AVX} - ], - [ - avx2_found="yes" - AVX2FLAGS="${switch_avx2flags}" - break - ], - [ avx2_found="no" ], - [ - echo "cross compiling...disabling" - avx2_found = "no" - break - ]) - done - - dnl Is AVX2 found? - AS_IF([ test "x$avx2_found" = "xyes" ], - [ - AC_MSG_RESULT(yes) - AVXFLAGS=${AVX2FLAGS} - ], - [ AC_MSG_RESULT(no) ] - ) - ], - [ - dnl No AVX - AC_MSG_RESULT(no) - ]) - - CXXFLAGS=${BACKUP_CXXFLAGS} - ], - [ ]) - ],[ AS_ECHO("SIMD disabled") - CUSTOM_SIMD="yes" ]) -]) diff --git a/macros/sse2-check.m4 b/macros/sse2-check.m4 deleted file mode 100644 index ea39874e4..000000000 --- a/macros/sse2-check.m4 +++ /dev/null @@ -1,68 +0,0 @@ -dnl Check for SSE -dnl Copyright (c) 2011 FFLAS-FFPACK -dnl Created by BB, 2014-03-25 -dnl ========LICENCE======== -dnl This file is part of the library FFLAS-FFPACK. -dnl -dnl FFLAS-FFPACK is free software: you can redistribute it and/or modify -dnl it under the terms of the GNU Lesser General Public -dnl License as published by the Free Software Foundation; either -dnl version 2.1 of the License, or (at your option) any later version. -dnl -dnl This library is distributed in the hope that it will be useful, -dnl but WITHOUT ANY WARRANTY; without even the implied warranty of -dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -dnl Lesser General Public License for more details. -dnl -dnl You should have received a copy of the GNU Lesser General Public -dnl License along with this library; if not, write to the Free Software -dnl Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -dnl ========LICENCE======== -dnl - -dnl FF_CHECK_SSE -dnl -dnl turn on SSE4.1 extensions if available - -AC_DEFUN([FF_CHECK_SSE], - [ - AC_ARG_ENABLE(sse,[AC_HELP_STRING([--disable-sse], [ Disable Intel(r) SSE 4.1])]) - AC_MSG_CHECKING(for SSE 4.1) - AS_IF([ test "x$enable_sse" != "xno" ], - [ - BACKUP_CXXFLAGS=${CXXFLAGS} - dnl SSEFLAGS="-msse2" - SSEFLAGS="-msse4.1" - CXXFLAGS="${BACKUP_CXXFLAGS} ${SSEFLAGS}" - CODE_SSE=`cat macros/CodeChunk/sse.C` - AC_TRY_RUN([ - ${CODE_SSE} - ], - [ sse_found="yes" ], - [ sse_found="no" ], - [ - echo "cross compiling...disabling" - sse_found="no" - ]) - AS_IF([ test "x$sse_found" = "xyes" ],[ - AC_DEFINE(HAVE_SSE4_1_INSTRUCTIONS,1,[Define if SSE is available]) - AC_SUBST(SSEFLAGS) - AC_MSG_RESULT(yes) - ], - [ - SSEFLAGS="" - dnl Forcing to disable AVX - enable_avx="no" - AC_MSG_RESULT(no) - ] - ) - CXXFLAGS=${BACKUP_CXXFLAGS} - ], - [ - dnl --disable-sse - AC_MSG_RESULT(no [disabled]) - dnl Forcing to disable AVX - enable_avx="no" - ] - ) - ]) diff --git a/tests/Makefile.am b/tests/Makefile.am index 4326820ec..e9afbcacd 100755 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -24,7 +24,13 @@ SUBDIRS = data check: $(BASE_TESTS) -AM_CXXFLAGS = ${DEFAULT_CFLAGS} -I$(top_srcdir) $(PRECOMPILE_FLAGS) +AM_CXXFLAGS = $(FFLASFFPACK_CXXFLAGS) $(GIVARO_CFLAGS) $(BLAS_CFLAGS) $(PARFLAGS) $(PRECOMPILE_FLAGS) +AM_CPPFLAGS = -I$(top_srcdir) +LDADD = $(GIVARO_LIBS) $(BLAS_LIBS) $(PARLIBS) +if FFLASFFPACK_PRECOMPILED +LDADD += $(top_builddir)/fflas-ffpack/interfaces/libs/libfflas.la +LDADD += $(top_builddir)/fflas-ffpack/interfaces/libs/libffpack.la +endif PERFPUBLISHERFILE=tests-report.xml @@ -71,18 +77,12 @@ BASIC_TESTS = \ regression-check if FFLASFFPACK_PRECOMPILED -LDADD = $(CBLAS_LIBS) $(GIVARO_LIBS) $(CUDA_LIBS) $(PARLIBS) \ - $(top_builddir)/fflas-ffpack/interfaces/libs/libfflas.la \ - $(top_builddir)/fflas-ffpack/interfaces/libs/libffpack.la - INTERFACE_TESTS= test-interfaces-c -test_interfaces_c_CFLAGS = ${DEFAULT_CFLAGS} -I$(top_srcdir) $(PRECOMPILE_FLAGS) +test_interfaces_c_CFLAGS = $(AM_CXXFLAGS) test_interfaces_c_LDADD = \ $(top_builddir)/fflas-ffpack/interfaces/libs/libfflas_c.la \ $(top_builddir)/fflas-ffpack/interfaces/libs/libffpack_c.la \ -lm -lstdc++ -else -LDADD = $(CBLAS_LIBS) $(GIVARO_LIBS) $(CUDA_LIBS) $(PARLIBS) endif NOT_A_TEST = \ @@ -165,9 +165,6 @@ test_fgemv_SOURCES = test-fgemv.C #test_pfgemm_DSL_SOURCES = test-pfgemm-DSL.C -dense_generator: dense_generator.C - $(CXX) $(CXXFLAGS) $(AM_CXXFLAGS) dense_generator.C -o dense_generator - # Perfpublisher script interaction - AB 2014/11/17 perfpublisher: +./perfpublisher.sh "$(PERFPUBLISHERFILE)" "$(EXTRA_PROGRAMS)" "$(CXX)" diff --git a/tests/dense_generator.C b/tests/dense_generator.C deleted file mode 100644 index ab0ca80bb..000000000 --- a/tests/dense_generator.C +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) FFLAS-FFPACK - * Written by Clement Pernet - * - * ========LICENCE======== - * This file is part of the library FFLAS-FFPACK. - * - * FFLAS-FFPACK is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - * ========LICENCE======== - */ -#include -#include -#include - - - -template -T& myrand (T& r, long size) -{ - if (size < 0) - return r = T( (lrand48() % (-size-size)) + size ); - else - return r = T( lrand48() % size ) ; -}; - - -int main(int argc, char ** argv) -{ - - srand48(time(NULL)); - long ni=10,nj=10,max=100; - int offset = 0; - - if (argc > ++offset) - ni = atoi( argv[offset] ); - if (argc > ++offset) - nj = atoi( argv[offset] ); - if (argc > ++offset) - max = atoi( argv[offset] ); - - long tmp; - printf("%ld %ld M\n", ni, nj); - for (long i = 0; i < ni; ++i) - for (long j = 0; j < nj; ++j){ - printf("%ld %ld %ld\n", i+1, j+1, myrand(tmp, max)); - } - - printf("0 0 0\n"); - - return 0; -} -/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s diff --git a/tutorials/Makefile.am b/tutorials/Makefile.am index 7e42edc08..2133dacb8 100644 --- a/tutorials/Makefile.am +++ b/tutorials/Makefile.am @@ -23,14 +23,12 @@ tutorials: $(TUTORIALS) -AM_CXXFLAGS = @DEFAULT_CFLAGS@ -I$(top_srcdir) $(PRECOMPILE_FLAGS) - +AM_CXXFLAGS = $(FFLASFFPACK_CXXFLAGS) $(GIVARO_CFLAGS) $(BLAS_CFLAGS) $(PARFLAGS) $(PRECOMPILE_FLAGS) +AM_CPPFLAGS = -I$(top_srcdir) +LDADD = $(GIVARO_LIBS) $(BLAS_LIBS) $(PARLIBS) if FFLASFFPACK_PRECOMPILED -LDADD = $(CBLAS_LIBS) $(GIVARO_LIBS) $(CUDA_LIBS) $(PARLIBS) \ - $(top_builddir)/fflas-ffpack/interfaces/libs/libfflas.la \ - $(top_builddir)/fflas-ffpack/interfaces/libs/libffpack.la -else -LDADD = $(CBLAS_LIBS) $(GIVARO_LIBS) $(CUDA_LIBS) $(PARLIBS) +LDADD += $(top_builddir)/fflas-ffpack/interfaces/libs/libfflas.la +LDADD += $(top_builddir)/fflas-ffpack/interfaces/libs/libffpack.la endif TUTORIALS = fflas_101 fflas_101_lvl1 fflas-101_1 fflas-101_3 2x2-fgemm 101-fgemm 2x2-pluq 2x2-ftrsv ffpack-solve ffpack-fgesv