Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffhammond committed Sep 1, 2023
1 parent f2c1e1e commit f6607c0
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 8 deletions.
10 changes: 5 additions & 5 deletions common/make.defs.cuda
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ OPENACCFLAG=-fopenacc
#
# OpenCL flags
#
OPENCLDIR=/usr/local/cuda-11.2/targets/x86_64-linux
OPENCLDIR=/usr/local/cuda-12.1/targets/x86_64-linux
OPENCLFLAG=-I${OPENCLDIR}/include -L${OPENCLDIR}/lib64 -lOpenCL
#OPENCLFLAG+=-Wno-ignored-attributes -Wno-deprecated-declarations
#OPENCLFLAG+=-Wno-deprecated-declarations -Wno-missing-braces
Expand Down Expand Up @@ -126,8 +126,8 @@ THRUSTFLAG=-I${THRUSTDIR} ${RANGEFLAG}
#
# CBLAS for C++ DGEMM
#
#BLASFLAG=
#CBLASFLAG=
BLASFLAG=-L/usr/lib/x86_64-linux-gnu/blis-openmp -lblis
CBLASFLAG=${BLASFLAG}
#
# CUDA flags
#
Expand All @@ -136,9 +136,9 @@ THRUSTFLAG=-I${THRUSTDIR} ${RANGEFLAG}
# Linux w/ NVIDIA CUDA
# NVCC never supports the latest GCC.
# Use appropriate arch or code is compiled to ancient features.
NVCC=/usr/local/cuda-11.2/bin/nvcc
NVCC=/usr/local/cuda-12.1/bin/nvcc
CUDAFLAGS=-g -O3 -std=c++11
CUDAFLAGS+=--gpu-architecture=sm_70
CUDAFLAGS+=--gpu-architecture=sm_89
#CUDAFLAGS+=--compiler-bindir=/swtools/gcc/7.5.0/bin
#CUDAFLAGS+=-forward-unknown-to-host-compiler -fopenmp
CUDAFLAGS+=-rdc=true # FIXES ptxas fatal : Unresolved extern function 'cudaCGGetIntrinsicHandle'
Expand Down
6 changes: 3 additions & 3 deletions common/make.defs.nvhpc
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,12 @@ CBLASFLAG=${BLASFLAG}
NVCC=${NVHPC_CBIN}nvcc
CUDAFLAGS=-g -O3 -std=c++17
CUDAFLAGS+=--extended-lambda
CUDAFLAGS+=--gpu-architecture=sm_80
CUDAFLAGS+=--gpu-architecture=sm_89
#CUDAFLAGS+=--compiler-bindir=/swtools/gcc/7.5.0/bin
#CUDAFLAGS+=-forward-unknown-to-host-compiler -fopenmp
CUDAFLAGS+=-rdc=true # FIXES ptxas fatal : Unresolved extern function 'cudaCGGetIntrinsicHandle'
CUDAFLAGS+=-I${NVHPC_PATH}/math_libs/11.5/targets/$$(uname -m)-linux/include
CUDAFLAGS+=-L${NVHPC_PATH}/math_libs/11.5/targets/$$(uname -m)-linux/lib
CUDAFLAGS+=-I${NVHPC_PATH}/math_libs/12.1/targets/$$(uname -m)-linux/include
CUDAFLAGS+=-L${NVHPC_PATH}/math_libs/12.1/targets/$$(uname -m)-linux/lib
# https://github.com/tensorflow/tensorflow/issues/1066#issuecomment-200574233
# heavy hammer:
CUDAFLAGS+=-D_X86INTRIN_H_INCLUDED
Expand Down

0 comments on commit f6607c0

Please sign in to comment.